#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Modules
import sys
import os
import sqlite3
import csv
import re
import syslog
# Configurable stuff
# Other globals
progname = sys.argv[0].rpartition("/")[2]
del sys.argv[0]
syslog_opened_flag = False
DEVNULL = open(os.devnull, 'wb')
db_file = None
#db_file = '%s.sqlite' % (progname)
use_syslog_flag = False
waypoint_name_regex = '^camp(?:[0-9]{2,}|XXX)[*+]{0,2}$'
float_regex = '^[-+]?[0-9]+(?:\.[0-9]+)?$'
campsite_longitude_regex = float_regex
campsite_latitude_regex = float_regex
campsite_country_regex = '^(?:AT|CZ|CH|DE|SK|DK|HU|SL|HR|PL)$'
letters = 'a-zA-ZáéřýőíüÉóéÖúžěöčľÜßČšŽŠäåňÅůŘřøæØťÍďÚ´ôżęłŻĺňűŐ'
campsite_name_regex = '^(?:[-%s 0-9/.()&+:"\']{3,}|[-?])$' % (letters)
campsite_city_regex = '^(?:[-%s 0-9/.()]{3,}|[-?])$' % (letters)
campsite_street_regex = '^(?:[-%s 0-9/.&]{5,}|[1-9][/0-9]*|[-?])$' % (letters)
tel_regex = '\+[1-9][0-9]{1,2} [1-9][0-9 ]*[0-9]'
campsite_tel_regex = '^(?:%s(?: or %s)*|\?)$' % (tel_regex, tel_regex)
campsite_tents_ok_regex = '^(?:yes|no|\?)$'
campsite_season_regex = '^((|mid)\d+|\?)-((|mid)\d+|\?)(|,((|mid)\d+|\?)-((|mid)\d+|\?))$'
campsite_season_regex = '^(?:(?:|mid)\d+|\?)-(?:(?:|mid)\d+|\?)$'
campsite_url_regex = '^(https?://[-a-zäöüA-Z0-9/.?&=_%,#!:@+]+|\?)$'
# Classes
# Functions
def main():
global verboselevel, DEVNULL, cursor, db_file
# Defaults for options
verboselevel = 2
output_fmt = None
# Process options, old school
while True:
if len(sys.argv) == 0:
break
# Application-specific options
elif sys.argv[0].startswith('--output-format='):
output_fmt = sys.argv[0][len('--output-format='):]
if output_fmt not in ['csv', 'gpx', 'html']:
usage()
# '-' is an argument not an option
elif sys.argv[0] == '-':
break
# Standard options
elif sys.argv[0].startswith('--debug='):
verboselevel = sys.argv[0][len('--debug='):]
if not re.search('^\d+$', verboselevel):
usage()
verboselevel = int(verboselevel)
elif sys.argv[0] == '--verbose':
verboselevel = 3
elif sys.argv[0] == '--help':
usage(0)
elif sys.argv[0] == '-v':
verboselevel = 3
elif sys.argv[0] == '-d':
if len(sys.argv) < 3:
usage()
verboselevel = sys.argv[1]
del sys.argv[0]
if not re.search('^\d+$', verboselevel):
usage()
verboselevel = int(verboselevel)
elif sys.argv[0] == '-h':
usage(0)
elif sys.argv[0] == '--':
del sys.argv[0]
break
elif sys.argv[0].startswith('-'):
usage()
else:
break
del sys.argv[0]
# Process arguments
input_files = sys.argv if len(sys.argv) > 0 else ['-']
debug(10, 'main: input_files=%s' % (input_files))
# Sanity checks and derivations
if output_fmt is None:
usage()
if db_file is None:
database_exists_flag = False
elif os.path.isfile(db_file):
database_exists_flag = True
else:
database_exists_flag = False
# Guts
# Connect to database
sqlite3.enable_callback_tracebacks(True)
conn = sqlite3.connect(db_file if db_file is not None else ':memory:')
# I want to control transactions myself
conn.isolation_level = None
# Create REGEXP function. (REGEXP operator maps to REGEXP function automatically)
def regexp(expr, item):
#debug(10, 'regexp: expr=%s, item=%s' % (expr, item))
reg = re.compile(expr)
return reg.search(item) is not None
conn.create_function("REGEXP", 2, regexp)
cursor = conn.cursor()
# Intialise database if necessary
if not database_exists_flag:
sqlite_exec_wrapper('CREATE TABLE campsites (input_file char, input_line int, waypoint_name char, campsite_latitude char, campsite_longitude char, campsite_name char, campsite_country char, campsite_city char, campsite_street char, campsite_tel char, campsite_tents_ok char, campsite_season char, campsite_url, PRIMARY KEY(input_file, input_line));')
# Normally I'd create a view to get the deduplicated list of campsites, but
# views don't have rowids and I want to use rowid to build the waypoint name.
# So we create a second copy of the table and we'll insert the deduplicated
# rows into into it with a single INSERT statement.
sqlite_exec_wrapper('CREATE TABLE deduplicated_campsites (campsite_latitude char, campsite_longitude char, campsite_name char, campsite_country char, campsite_city char, campsite_street char, campsite_tel char, campsite_tents_ok char, campsite_season char,campsite_url char);')
# Slurp input with filenames and line numbers (useful for error messages)
sqlite_exec_wrapper('BEGIN TRANSACTION;')
for input_file in input_files:
if input_file == '-':
fh = sys.stdin
else:
fh = open(input_file, 'r')
input_line = 0
for row in csv.reader(fh):
input_line += 1
debug(10, 'main: file=%s, line=%d' % (input_file, input_line))
(waypt_name, campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url) = row[5:-1]
# Ignore all non-camp rows
if not 'camp' in waypt_name:
continue
# Ignore knowingly duplicated camps.
if 'see above' in campsite_name:
continue
debug(10, 'main: waypt_name=%s, campsite_latitude=%s, campsite_longitude=%s, campsite_name=%s, campsite_country=%s, campsite_city=%s, campsite_street=%s, campsite_tel=%s, campsite_tents_ok=%s, campsite_season=%s, campsite_url=%s' % (waypt_name, campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url))
sqlite_exec_wrapper('INSERT INTO campsites VALUES (\'%s\', %d, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);' % (input_file, input_line, sql_stringify(waypt_name), sql_stringify(campsite_latitude), sql_stringify(campsite_longitude), sql_stringify(campsite_name), sql_stringify(campsite_country), sql_stringify(campsite_city), sql_stringify(campsite_street), sql_stringify(campsite_tel), sql_stringify(campsite_tents_ok), sql_stringify(campsite_season), sql_stringify(campsite_url)))
if input_file == '-':
fh.close()
sqlite_exec_wrapper('END TRANSACTION;')
# Report badly formatted values.
check_param_sets = [
{ 'col':'waypoint_name', 'regex':waypoint_name_regex },
{ 'col':'campsite_latitude', 'regex':campsite_latitude_regex },
{ 'col':'campsite_longitude', 'regex':campsite_longitude_regex },
{ 'col':'campsite_name', 'regex':campsite_name_regex },
{ 'col':'campsite_country', 'regex':campsite_country_regex },
{ 'col':'campsite_city', 'regex':campsite_city_regex },
{ 'col':'campsite_street', 'regex':campsite_street_regex },
{ 'col':'campsite_tel', 'regex':campsite_tel_regex },
{ 'col':'campsite_tents_ok', 'regex':campsite_tents_ok_regex },
{ 'col':'campsite_season', 'regex':campsite_season_regex },
{ 'col':'campsite_url', 'regex':campsite_url_regex },
]
for check_param_set in check_param_sets:
sql_statement = 'SELECT input_file, input_line as error FROM campsites WHERE %s IS NULL ORDER BY input_file, input_line;' % (check_param_set['col'])
# Check first for NULL (else REGEXP bauks)
for tuple in sqlite_exec_wrapper(sql_statement):
(input_file, input_line) = tuple
sys.stderr.write('%-20s %s\n' % ('%s:%s:' % (input_file, input_line), 'empty %s' % (check_param_set['col'])))
sql_statement = 'SELECT input_file, input_line, %s as whatever FROM campsites WHERE %s IS NOT NULL AND NOT %s REGEXP %s ORDER BY input_file, input_line;' % (check_param_set['col'], check_param_set['col'],check_param_set['col'], sql_stringify(check_param_set['regex']))
# Check second value is formatted correctly.
for tuple in sqlite_exec_wrapper(sql_statement):
(input_file, input_line, whatever) = tuple
sys.stderr.write('%-20s %s\n' % ('%s:%d:' % (input_file, input_line), '%s: bad %s' % (whatever, check_param_set['col'])))
# Report mismatching values in lines occurring multiple times.
columns = [
'campsite_name',
'campsite_country',
'campsite_city',
'campsite_street',
'campsite_tel',
'campsite_tents_ok',
'campsite_season',
'campsite_url',
]
for column in columns:
# Cross the table with itself and look for unequal fields. Use
# 'c1.whatever > c2.whatever' to ignore one of the two occurences
# of a mismatch this join would otherwise produce.
sql_statement = 'SELECT c1.input_file || \':\' || c1.input_line || \'/\' || c2.input_file || \':\' || c2.input_line AS location, \'mismatching %s (\' || c1.%s || \'!=\' || c2.%s || \')\' AS error FROM campsites AS c1, campsites AS c2 WHERE c1.campsite_latitude == c2.campsite_latitude AND c1.campsite_longitude == c2.campsite_longitude AND c1.%s != c2.%s AND c1.%s > c2.%s;' % (column, column, column, column, column, column, column)
for tuple in sqlite_exec_wrapper(sql_statement):
(location, error) = tuple
sys.stderr.write('%-40s %s\n' % (location, error))
# Deduplicate in a way that gives a rowid.
sqlite_exec_wrapper('INSERT INTO deduplicated_campsites SELECT DISTINCT campsite_latitude, campsite_longitude, campsite_name, campsite_country, campsite_city, campsite_street, campsite_tel, campsite_tents_ok, campsite_season, campsite_url FROM campsites ORDER BY campsite_latitude;')
# Whatever format we output in, we use use the same ordering and
# and selection in order to ensure that the rowid values correspond
# across the differing output files.
if output_fmt == 'gpx':
def prologue():
print('')
print('
(GM, OSM, BM, geo)' % (campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude, campsite_latitude, campsite_longitude),
'%s' % (campsite_url, campsite_name) if campsite_url.startswith('http') else campsite_name,
campsite_country,
campsite_city,
campsite_street,
' or '.join(['%s' % (tel, tel) for tel in campsite_tel.split(' or ') if tel not in ['?','-']]),
campsite_tents_ok,
campsite_season])
def epilogue():
pass
elif output_fmt == 'html':
def prologue():
print('')
print('
waypt | name | addr | tel |
---|---|---|---|
%s | %s | %s | %s |