#!/usr/bin/python3 # -*- coding: utf-8 -*- # Modules import sys import os import sqlite3 import csv import re import syslog # Configurable stuff # Other globals progname = sys.argv[0].rpartition("/")[2] del sys.argv[0] syslog_opened_flag = False DEVNULL = open(os.devnull, 'wb') db_file = None #db_file = '%s.sqlite' % (progname) use_syslog_flag = False waypoint_name_regex = '^camp(?:[0-9]{2,}|XXX)[*+]{0,2}$' float_regex = '^[-+]?[0-9]+(?:\.[0-9]+)?$' campsite_longitude_regex = float_regex campsite_latitude_regex = float_regex campsite_country_regex = '^(?:AT|CZ|CH|DE|SK|DK|HU|SL|HR|PL)$' letters = 'a-zA-ZáéřýőíüÉóéÖúžěöčľÜßČšŽŠäåňÅůŘřøæØťÍďÚ´ôżęłŻĺňűŐ' campsite_name_regex = '^(?:[-%s 0-9/.()&+:"\']{3,}|[-?])$' % (letters) campsite_city_regex = '^(?:[-%s 0-9/.()]{3,}|[-?])$' % (letters) campsite_street_regex = '^(?:[-%s 0-9/.&]{5,}|[1-9][/0-9]*|[-?])$' % (letters) tel_regex = '\+[1-9][0-9]{1,2} [1-9][0-9 ]*[0-9]' campsite_tel_regex = '^(?:%s(?: or %s)*|\?)$' % (tel_regex, tel_regex) campsite_tents_ok_regex = '^(?:yes|no|\?)$' campsite_season_regex = '^((|mid)\d+|\?)-((|mid)\d+|\?)(|,((|mid)\d+|\?)-((|mid)\d+|\?))$' campsite_season_regex = '^(?:(?:|mid)\d+|\?)-(?:(?:|mid)\d+|\?)$' campsite_url_regex = '^(https?://[-a-zäöüA-Z0-9/.?&=_%,#!:@+]+|\?)$' # Classes # Functions def main(): global verboselevel, DEVNULL, cursor, db_file # Defaults for options verboselevel = 2 output_fmt = None # Process options, old school while True: if len(sys.argv) == 0: break # Application-specific options elif sys.argv[0].startswith('--output-format='): output_fmt = sys.argv[0][len('--output-format='):] if output_fmt not in ['csv', 'gpx', 'html']: usage() # '-' is an argument not an option elif sys.argv[0] == '-': break # Standard options elif sys.argv[0].startswith('--debug='): verboselevel = sys.argv[0][len('--debug='):] if not re.search('^\d+$', verboselevel): usage() verboselevel = int(verboselevel) elif sys.argv[0] == '--verbose': verboselevel = 3 elif sys.argv[0] == '--help': usage(0) elif sys.argv[0] == '-v': verboselevel = 3 elif sys.argv[0] == '-d': if len(sys.argv) < 3: usage() verboselevel = sys.argv[1] del sys.argv[0] if not re.search('^\d+$', verboselevel): usage() verboselevel = int(verboselevel) elif sys.argv[0] == '-h': usage(0) elif sys.argv[0] == '--': del sys.argv[0] break elif sys.argv[0].startswith('-'): usage() else: break del sys.argv[0] # Process arguments input_files = sys.argv if len(sys.argv) > 0 else ['-'] debug(10, 'main: input_files=%s' % (input_files)) # Sanity checks and derivations if output_fmt is None: usage() if db_file is None: database_exists_flag = False elif os.path.isfile(db_file): database_exists_flag = True else: database_exists_flag = False # Guts # Connect to database sqlite3.enable_callback_tracebacks(True) conn = sqlite3.connect(db_file if db_file is not None else ':memory:') # I want to control transactions myself conn.isolation_level = None # Create REGEXP function. (REGEXP operator maps to REGEXP function automatically) def regexp(expr, item): #debug(10, 'regexp: expr=%s, item=%s' % (expr, item)) reg = re.compile(expr) return reg.search(item) is not None conn.create_function("REGEXP", 2, regexp) cursor = conn.cursor() # Intialise database if necessary if not database_exists_flag: sqlite_exec_wrapper('CREATE TABLE campsites (input_file char, input_line int, waypoint_name char, campsite_latitude char, campsite_longitude char, campsite_name char, campsite_country char, campsite_city char, campsite_street char, campsite_tel char, campsite_tents_ok char, campsite_season char, campsite_url, PRIMARY KEY(input_file, input_line));') # Normally I'd create a view to get the deduplicated list of campsites, but # views don't have rowids and I want to use rowid to build the waypoint name. # So we create a second copy of the table and we'll insert the deduplicated # rows into into it with a single INSERT statement. sqlite_exec_wrapper('CREATE TABLE deduplicated_campsites (campsite_latitude char, campsite_longitude char, campsite_name char, campsite_country char, campsite_city char, campsite_street char, campsite_tel char, campsite_tents_ok char, campsite_season char,campsite_url char);') # Slurp input with filenames and line numbers (useful for error messages) sqlite_exec_wrapper('BEGIN TRANSACTION;') for input_file in input_files: if input_file == '-': fh = sys.stdin else: fh = open(input_file, 'r') input_line = 0 for row in csv.reader(fh): input_line += 1 debug(10, 'main: file=%s, line=%d' % (input_file, input_line)) (waypt_name, campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url) = row[5:-1] # Ignore all non-camp rows if not 'camp' in waypt_name: continue # Ignore knowingly duplicated camps. if 'see above' in campsite_name: continue debug(10, 'main: waypt_name=%s, campsite_latitude=%s, campsite_longitude=%s, campsite_name=%s, campsite_country=%s, campsite_city=%s, campsite_street=%s, campsite_tel=%s, campsite_tents_ok=%s, campsite_season=%s, campsite_url=%s' % (waypt_name, campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url)) sqlite_exec_wrapper('INSERT INTO campsites VALUES (\'%s\', %d, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' % (input_file, input_line, sql_stringify(waypt_name), sql_stringify(campsite_latitude), sql_stringify(campsite_longitude), sql_stringify(campsite_name), sql_stringify(campsite_country), sql_stringify(campsite_city), sql_stringify(campsite_street), sql_stringify(campsite_tel), sql_stringify(campsite_tents_ok), sql_stringify(campsite_season), sql_stringify(campsite_url))) if input_file == '-': fh.close() sqlite_exec_wrapper('END TRANSACTION;') # Report badly formatted values. check_param_sets = [ { 'col':'waypoint_name', 'regex':waypoint_name_regex }, { 'col':'campsite_latitude', 'regex':campsite_latitude_regex }, { 'col':'campsite_longitude', 'regex':campsite_longitude_regex }, { 'col':'campsite_name', 'regex':campsite_name_regex }, { 'col':'campsite_country', 'regex':campsite_country_regex }, { 'col':'campsite_city', 'regex':campsite_city_regex }, { 'col':'campsite_street', 'regex':campsite_street_regex }, { 'col':'campsite_tel', 'regex':campsite_tel_regex }, { 'col':'campsite_tents_ok', 'regex':campsite_tents_ok_regex }, { 'col':'campsite_season', 'regex':campsite_season_regex }, { 'col':'campsite_url', 'regex':campsite_url_regex }, ] for check_param_set in check_param_sets: sql_statement = 'SELECT input_file, input_line as error FROM campsites WHERE %s IS NULL ORDER BY input_file, input_line;' % (check_param_set['col']) # Check first for NULL (else REGEXP bauks) for tuple in sqlite_exec_wrapper(sql_statement): (input_file, input_line) = tuple sys.stderr.write('%-20s %s\n' % ('%s:%s:' % (input_file, input_line), 'empty %s' % (check_param_set['col']))) sql_statement = 'SELECT input_file, input_line, %s as whatever FROM campsites WHERE %s IS NOT NULL AND NOT %s REGEXP %s ORDER BY input_file, input_line;' % (check_param_set['col'], check_param_set['col'],check_param_set['col'], sql_stringify(check_param_set['regex'])) # Check second value is formatted correctly. for tuple in sqlite_exec_wrapper(sql_statement): (input_file, input_line, whatever) = tuple sys.stderr.write('%-20s %s\n' % ('%s:%d:' % (input_file, input_line), '%s: bad %s' % (whatever, check_param_set['col']))) # Report mismatching values in lines occurring multiple times. columns = [ 'campsite_name', 'campsite_country', 'campsite_city', 'campsite_street', 'campsite_tel', 'campsite_tents_ok', 'campsite_season', 'campsite_url', ] for column in columns: # Cross the table with itself and look for unequal fields. Use # 'c1.whatever > c2.whatever' to ignore one of the two occurences # of a mismatch this join would otherwise produce. sql_statement = 'SELECT c1.input_file || \':\' || c1.input_line || \'/\' || c2.input_file || \':\' || c2.input_line AS location, \'mismatching %s (\' || c1.%s || \'!=\' || c2.%s || \')\' AS error FROM campsites AS c1, campsites AS c2 WHERE c1.campsite_latitude == c2.campsite_latitude AND c1.campsite_longitude == c2.campsite_longitude AND c1.%s != c2.%s AND c1.%s > c2.%s;' % (column, column, column, column, column, column, column) for tuple in sqlite_exec_wrapper(sql_statement): (location, error) = tuple sys.stderr.write('%-40s %s\n' % (location, error)) # Deduplicate in a way that gives a rowid. sqlite_exec_wrapper('INSERT INTO deduplicated_campsites SELECT DISTINCT campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url FROM campsites ORDER BY campsite_latitude;') # Whatever format we output in, we use use the same ordering and # and selection in order to ensure that the rowid values correspond # across the differing output files. if output_fmt == 'gpx': def prologue(): print('') print('') sql_statement = ''' SELECT campsite_latitude, campsite_longitude, waypoint_name, campsite_url FROM campsites ORDER BY waypoint_name; ''' def process_row(tuple): (campsite_latitude, campsite_longitude, waypoint_name, campsite_url) = tuple print(' ' % (campsite_latitude, campsite_longitude)) print(' %s' % (waypoint_name)) if campsite_url.startswith('http'): # Gorp will load URLs encoded with this: # # # # but MapSource won't. MapSource uses GPX 1.0 standard but # even that's specification includes ... so # I'm not sure why it does not work. If I load a GPX file # without links into MapSource, add a link to a camp, # save it out then it becomes clear that MapSource will # # # # So we use that, which is supported by GPX 1.1 (which GORP # writes) but according to the official docs not by 1.0 # (https://www.topografix.com/gpx_manual.asp)! # # Also note that MapSource is sensitive to the *order*: # it will take name/link/sym but not name/sym/link! # # Ha! But in a MapSource still has an allergy # to ampersands! They can't be escaped because escaping is # for *values* in the query string (e.g. q=M&Ms) , not for # the separators (e.g. q=M%26Ms&filetype=pdf). # # But after all that research, and specifically discovering # that order is important, it seems that MapSource *does* # accept but still with the same don't-like-amersand # problem. And we can't escape the ampersands. But can we do # it with CDATA protection! Finally! print(' ' % (campsite_url)) print(' Triangle, Green') print(' ') def epilogue(): print('') elif output_fmt == 'csv': def prologue(): global spamwriter spamwriter = csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC) spamwriter.writerow(["marker","lat/lon","camp name & homepage","ctry","town","street","telephone","tents ok?","season"]) sql_statement = ''' SELECT rowid, campsite_latitude, campsite_longitude, campsite_name, campsite_url, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season FROM deduplicated_campsites; ''' def process_row(tuple): global spamwriter (rowid, campsite_latitude, campsite_longitude, campsite_name, campsite_url, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season) = tuple spamwriter.writerow([ 'camp%04d' % (rowid), '%s,%s
(GM, OSM, BM, geo)' % (campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude), '%s' % (campsite_url, campsite_name) if campsite_url.startswith('http') else campsite_name, campsite_country, campsite_city, campsite_street, ' or '.join(['%s' % (tel, tel) for tel in campsite_tel.split(' or ') if tel not in ['?','-']]), campsite_tents_ok, campsite_season]) def epilogue(): pass elif output_fmt == 'html': def prologue(): print('') print('') print('') print('camps') print('') print('') print('') print(' ') print(' ') sql_statement = ''' SELECT waypoint_name, campsite_name, campsite_city, campsite_street, campsite_url, campsite_latitude, campsite_longitude, campsite_tel FROM campsites ORDER BY waypoint_name; ''' def process_row(tuple): (waypoint_name, campsite_name, campsite_city, campsite_street, campsite_url, campsite_latitude, campsite_longitude, campsite_tel) = tuple print(' ' % ( waypoint_name, '%s' % (campsite_url, campsite_name) if campsite_url.startswith('http') else campsite_name, campsite_latitude, campsite_longitude, ', '.join([address_component for address_component in [campsite_street, campsite_city] if address_component not in ['?','-']]), ' or '.join(['%s' % (tel, tel) for tel in campsite_tel.split(' or ') if tel not in ['?','-']]) )) def epilogue(): print('
wayptnameaddrtel
%s%s%s%s
') print('') print('') # Generate report prologue() for tuple in sqlite_exec_wrapper(sql_statement): process_row(tuple) epilogue() # SQL functions def sqlite_exec_wrapper(statement): global cursor return cursor.execute(statement) def sql_stringify(s): return 'NULL' if s == '' else '\'%s\''%(s.replace('\'', '\'\'')) # Messaging functions def usage(rc=1): global progname fh = sys.stdout if rc == 0 else sys.stderr fh.write('Usage: %s [ ] --output format= [ ... ]\n' % progname) fh.write('\n') fh.write('Formats: gpx: suitable only for single-holiday GPX files\n') fh.write(' html: suitable only for single-holiday HTML files\n') fh.write(' csv: intermediate filetype used by Makefiles\n') sys.exit(rc) def debug(level, text): global verboselevel if verboselevel < level: return msg_writer("DEBUG[%s]: %s" % (level, text), syslog.LOG_DEBUG) def info(text): global verboselevel if verboselevel < 3: return msg_writer("INFO: %s" % (text), syslog.LOG_INFO) def warning(text): global verboselevel if verboselevel < 2: return msg_writer("WARNING: %s" % (text), syslog.LOG_WARNING) def error(text): global errors_bubble_up_flag msg_writer("ERROR: %s" % (text), syslog.LOG_ERR) if not errors_bubble_up_flag: sys.exit(1) def internal(text): msg_writer("INTERNAL ERROR: %s" % (text), syslog.LOG_CRIT) sys.exit(2) def msg_writer(text, syslog_level): global use_syslog_flag msg_writer_stderr(text) if use_syslog_flag and not sys.stderr.isatty(): msg_writer_syslog(text, syslog_level) def msg_writer_stderr(text): global progname sys.stderr.write("%s: %s\n" % (progname, text)) def msg_writer_syslog(text, syslog_level): global syslog_opened_flag, progname if not syslog_opened_flag: if sys.version_info < (2, 7): syslog.openlog(progname, syslog.LOG_PID) else: syslog.openlog(progname, logoption=syslog.LOG_PID) syslog_opened_flag = True syslog.syslog((syslog.LOG_LOCAL0|syslog_level), text) # Entry point if __name__=="__main__": main()