#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Modules
import sys
import os
import sqlite3
import csv
import re
import syslog
import time
# Configurable stuff
# Other globals
progname = sys.argv[0].rpartition("/")[2]
del sys.argv[0]
syslog_opened_flag = False
errors_bubble_up_flag = False
DEVNULL = open(os.devnull, 'wb')
db_file = None
#db_file = '%s.sqlite' % (progname)
use_syslog_flag = False
waypoint_name_regex = '^camp(?:[0-9]{2,}|XXX)[*+]{0,2}$'
float_regex = '^[-+]?[0-9]+(?:\.[0-9]+)?$'
campsite_longitude_regex = float_regex
campsite_latitude_regex = float_regex
campsite_country_regex = '^(?:AT|CZ|CH|DE|SK|DK|HU|SL|HR|PL)$'
letters = 'a-zA-ZáéřýőíüÉóéÖúžěöčľÜßČšŽŠäåňÅůŘřøæØťÍďÚ´ôżęłŻĺňűŐąšěÝńś'
campsite_name_regex = '^(?:[-%s 0-9/.()&+:"\']{3,}|[-?])$' % (letters)
campsite_city_regex = '^(?:[-%s 0-9/.()]{2,}|[-?])$' % (letters)
campsite_street_regex = '^(?:[-%s 0-9/.&]{5,}|[1-9][/0-9]*|[-?])$' % (letters)
tel_regex = '\+[1-9][0-9]{1,2} [1-9][0-9 ]*[0-9]'
campsite_tel_regex = '^(?:%s(?: or %s)*|\?|-)$' % (tel_regex, tel_regex)
campsite_tents_ok_regex = '^(?:yes|no|\?)$'
campsite_season_regex = '^((|mid)\d+|\?)-((|mid)\d+|\?)(|,((|mid)\d+|\?)-((|mid)\d+|\?))$'
campsite_season_regex = '^(?:(?:|mid)\d+|\?)-(?:(?:|mid)\d+|\?)$'
campsite_url_regex = '^(https?://[-a-zäöüA-Z0-9/.?&=_%,#!:@+]+|\?|-)$'
# Classes
# Functions
def main():
global verboselevel, DEVNULL, cursor, db_file
# Defaults for options
verboselevel = 2
output_fmt = None
tolerance = 0.0
exit_on_mismatch_flag = True
# Process options, old school
while True:
if len(sys.argv) == 0:
break
# Application-specific options
elif sys.argv[0] == '--no-exit-on-mismatch':
exit_on_mismatch_flag = False
elif sys.argv[0] == '--exit-on-mismatch':
exit_on_mismatch_flag = True
elif sys.argv[0].startswith('--output-format='):
output_fmt = sys.argv[0][len('--output-format='):]
if output_fmt not in ['null', 'gpx', 'html', 'kml']:
usage()
elif sys.argv[0].startswith('--tolerance='):
tolerance = float(sys.argv[0][len('--tolerance='):])
# '-' is an argument not an option
elif sys.argv[0] == '-':
break
# Standard options
elif sys.argv[0].startswith('--debug='):
verboselevel = sys.argv[0][len('--debug='):]
if not re.search('^\d+$', verboselevel):
usage()
verboselevel = int(verboselevel)
elif sys.argv[0] == '--verbose':
verboselevel = 3
elif sys.argv[0] == '--help':
usage(0)
elif sys.argv[0] == '-v':
verboselevel = 3
elif sys.argv[0] == '-d':
if len(sys.argv) < 3:
usage()
verboselevel = sys.argv[1]
del sys.argv[0]
if not re.search('^\d+$', verboselevel):
usage()
verboselevel = int(verboselevel)
elif sys.argv[0] == '-h':
usage(0)
elif sys.argv[0] == '--':
del sys.argv[0]
break
elif sys.argv[0].startswith('-'):
usage()
else:
break
del sys.argv[0]
# Process arguments
input_files = sys.argv if len(sys.argv) > 0 else ['-']
debug(10, 'main: input_files=%s' % (input_files))
# Sanity checks and derivations
if output_fmt is None:
usage()
if db_file is None:
database_exists_flag = False
elif os.path.isfile(db_file):
database_exists_flag = True
else:
database_exists_flag = False
# Guts
# Connect to database
sqlite3.enable_callback_tracebacks(True)
conn = sqlite3.connect(db_file if db_file is not None else ':memory:')
# I want to control transactions myself
conn.isolation_level = None
# Create REGEXP function. (REGEXP operator maps to REGEXP function automatically)
def regexp(expr, item):
#debug(10, 'regexp: expr=%s, item=%s' % (expr, item))
reg = re.compile(expr)
return reg.search(item) is not None
conn.create_function("REGEXP", 2, regexp)
cursor = conn.cursor()
# Intialise database if necessary
if not database_exists_flag:
sqlite_exec_wrapper('CREATE TABLE campsites (input_file char, input_line int, waypoint_name char, campsite_latitude char, campsite_longitude char, campsite_name char, campsite_country char, campsite_city char, campsite_street char, campsite_tel char, campsite_tents_ok char, campsite_season char, campsite_url char, comment char, extra_col_1, extra_col_2, extra_col_3, PRIMARY KEY(input_file, input_line));')
# Slurp input with filenames and line numbers (useful for error messages)
sqlite_exec_wrapper('BEGIN TRANSACTION;')
for input_file in input_files:
if input_file == '-':
fh = sys.stdin
else:
fh = open(input_file, 'r')
input_line = 0
for row in csv.reader(fh):
input_line += 1
debug(10, 'main: file=%s, line=%d' % (input_file, input_line))
(waypt_name, campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url, comment, extra_col_1, extra_col_2, extra_col_3) = row[5:]
# Ignore all non-camp rows
if not 'camp' in waypt_name:
continue
# Ignore knowingly duplicated camps.
if 'see above' in campsite_name:
continue
debug(10, 'main: waypt_name=%s, campsite_latitude=%s, campsite_longitude=%s, campsite_name=%s, campsite_country=%s, campsite_city=%s, campsite_street=%s, campsite_tel=%s, campsite_tents_ok=%s, campsite_season=%s, campsite_url=%s, comment=%s, extra_col_1=%s, extra_col_2=%s, extra_col_3=%s' % (waypt_name, campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url, comment, extra_col_1, extra_col_2, extra_col_3))
sqlite_exec_wrapper('INSERT INTO campsites VALUES (\'%s\', %d, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' % (input_file, input_line, sql_stringify(waypt_name), sql_stringify(campsite_latitude), sql_stringify(campsite_longitude), sql_stringify(campsite_name), sql_stringify(campsite_country), sql_stringify(campsite_city), sql_stringify(campsite_street), sql_stringify(campsite_tel), sql_stringify(campsite_tents_ok), sql_stringify(campsite_season), sql_stringify(campsite_url), sql_stringify(comment), sql_stringify(extra_col_1), sql_stringify(extra_col_2), sql_stringify(extra_col_3)))
if input_file != '-':
fh.close()
sqlite_exec_wrapper('END TRANSACTION;')
debug(5, 'main: checking syntax ...')
# Report badly formatted values.
had_badly_formatted_value_flag = False
check_param_sets = [
{ 'col':'waypoint_name', 'regex':waypoint_name_regex },
{ 'col':'campsite_latitude', 'regex':campsite_latitude_regex },
{ 'col':'campsite_longitude', 'regex':campsite_longitude_regex },
{ 'col':'campsite_name', 'regex':campsite_name_regex },
{ 'col':'campsite_country', 'regex':campsite_country_regex },
{ 'col':'campsite_city', 'regex':campsite_city_regex },
{ 'col':'campsite_street', 'regex':campsite_street_regex },
{ 'col':'campsite_tel', 'regex':campsite_tel_regex },
{ 'col':'campsite_tents_ok', 'regex':campsite_tents_ok_regex },
{ 'col':'campsite_season', 'regex':campsite_season_regex },
{ 'col':'campsite_url', 'regex':campsite_url_regex },
]
for check_param_set in check_param_sets:
# Check first for NULL (else regexp check will bawk)
sql_statement = 'SELECT input_file, input_line as error FROM campsites WHERE %s IS NULL ORDER BY input_file, input_line;' % (check_param_set['col'])
for tuple in sqlite_exec_wrapper(sql_statement):
(input_file, input_line) = tuple
sys.stderr.write('%-20s %s\n' % ('%s:%s:' % (input_file, input_line), 'empty %s' % (check_param_set['col'])))
had_badly_formatted_value_flag = True
# Check second for (not-NULL and) regexp mismatches
sql_statement = 'SELECT input_file, input_line, %s as whatever FROM campsites WHERE %s IS NOT NULL AND NOT %s REGEXP %s ORDER BY input_file, input_line;' % (check_param_set['col'], check_param_set['col'],check_param_set['col'], sql_stringify(check_param_set['regex']))
for tuple in sqlite_exec_wrapper(sql_statement):
(input_file, input_line, whatever) = tuple
sys.stderr.write('%-20s %s\n' % ('%s:%d:' % (input_file, input_line), '%s: bad %s' % (whatever, check_param_set['col'])))
had_badly_formatted_value_flag = True
# On error skip remaining checks.
if exit_on_mismatch_flag and had_badly_formatted_value_flag:
error('exiting due to one or more badly formatted values (see output above)')
# Crosscheck all camps' details using the coordinates (within tolerance) as key.
had_mismatching_values_flag = False
columns = [
'campsite_name',
'campsite_country',
'campsite_city',
'campsite_street',
'campsite_tel',
'campsite_tents_ok',
'campsite_season',
'campsite_url',
]
for column in columns:
debug(5, 'main: checking for mismatches on %s ...' % (column))
sql_statement = 'SELECT c1.input_file || \':\' || c1.input_line || \'/\' || c2.input_file || \':\' || c2.input_line AS location, \'mismatching %s (\' || c1.%s || \'!=\' || c2.%s || \')\' AS error FROM campsites AS c1, campsites AS c2 WHERE abs(c1.campsite_latitude-c2.campsite_latitude) <= %f AND abs(c1.campsite_longitude-c2.campsite_longitude) <= %f AND c1.%s != c2.%s AND c1.%s > c2.%s ORDER BY c1.input_file, c1.input_line, c2.input_file, c2.input_line;' % (column, column, column, tolerance, tolerance, column, column, column, column)
debug(5, 'main: sql_statement: %s' % (sql_statement))
for tuple in sqlite_exec_wrapper(sql_statement):
(location, mismatch_description) = tuple
sys.stderr.write('%-40s %s\n' % (location, mismatch_description))
had_mismatching_values_flag = True
# On error skip remaining checks.
if exit_on_mismatch_flag and had_mismatching_values_flag:
error('exiting due to one or more mismatching values (see output above)')
# Output data in preferred format.
if output_fmt == 'gpx':
def prologue():
print('')
print('
waypt | name | addr | tel |
---|---|---|---|
%s | %s | %s | %s |
Last updated: %s
' % (time.ctime())) print('') elif output_fmt == 'kml': def prologue(): print('') print('