#!/usr/bin/python3
# -*- coding: utf-8 -*-

app_svnid = '$HeadURL$ $LastChangedRevision$'
import sys
import subprocess
#  python3's subprocess.Popen() returns a byte sequence, so to append the string '/include' we need to convert
#  one to the other. sys.path.append() expects a list of strings, not byte sequences. Therefore it makes most
#  sense to convert subprocess.Popen()'s output to a string or to get it to return a string directly. The latter
#   can be done by use of 'universal_newlines=True'
sys.path.append(subprocess.Popen(["ade-config", "ade_share_prefix"], stdout=subprocess.PIPE, universal_newlines=True).communicate()[0][:-1] + '/include')
import ade
import os
import re


ademan2html_defined_errors  = {
    "ademan2html_err_misc":{"fmt":"%s"},
}

#  Instantiate option variables

#  Other globals
url_regex = r'(?:mailto|http|https)://(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?&//=]*)'
#  Colon-separated list of directories not already in $PATH that might contain zsoelim command.
zsoelim_path = '/usr/lib/man-db:/usr/libexec/man-db'

def ademan2html(errstack):
    global opt_full

    #  Register application-specific errors
    ade.ade_err_registerdefderrs(ademan2html_defined_errors)
    
    #  Defaults for options
    opt_full = False
   
    #  Register options
    ade.ade_err_debug(errstack, 10, "ademan2html: registering options ...")
    rc = ade.ade_opt_register(errstack, "", "full", globals(), 'ademan2html_opt_handler_%s')
    if rc != ade.ade_err_ok:
        return rc

    #  Register handler functions
    ade.ade_err_debug(errstack, 10, "ademan2html: registering message handlers ...")
    rc = ade.ade_msg_register(errstack, ademan2html_usage, ademan2html_version, ademan2html_listpaths)
    if rc != ade.ade_err_ok:
        return rc

    #  Process options
    ade.ade_err_debug(errstack, 10, "ademan2html: processing options ...")
    rc = ade.ade_opt_process(errstack)
    if rc != ade.ade_err_ok:
        return rc

    #  Process arguments
    if len(sys.argv) not in [0, 1]:
        ade.ade_msg_usage(errstack)
    if len(sys.argv) == 0:
        filename = '<stdin>'
        in_handle = sys.stdin
    else:
        filename = sys.argv[0]
        try:
            in_handle = open(filename, 'r')
        except:
            ade.ade_err_error(errstack, 'ademan2html_err_misc', '%s: failed to open' % (filename))
            return ade.ade_err_fail

    #  Guts
    rc = process_handle(errstack, in_handle, sys.stdout, filename, opt_full)
    if rc != ade.ade_err_ok:
        return rc

    return ade.ade_err_ok

def ademan2html_version(errstack):
    return ade.ade_smf_extractversionfromsvnstring(errstack, app_svnid)

def ademan2html_listpaths(errstack):
    return ade.ade_err_ok, None

def ademan2html_usage(errstack, passno):
    if passno == 1:
        text = '[ <file> ]'
    elif passno == 2:
        text = '                     --full                  write HTML prologue and epilogue'
    else:
        ade.ade_err_internal(errstack, 'ademan2html_usage: %d: bad passnumber' % (passno))
    return ade.ade_err_ok, text

def process_handle(errstack, in_handle, out_handle, filename, full_flag):
    if full_flag:
        out_handle.write('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n')
        out_handle.write('<html>\n')
        out_handle.write('<body>\n')
    # 'a,_,c' is a widely recognised way to discard parts of a tuple. See 
    #  https://stackoverflow.com/questions/9532576/ignore-part-of-a-python-tuple
    rc,_ = ade.ade_spc_autogendheader(errstack, out_handle, '<!--', '\n', '    ', '\n', '    ', '-->')
    if rc != ade.ade_err_ok:
        return rc
    out_handle.write('<pre>')
    #  nroff on its own is not intelligent enough to find .so'd files.
    #  The man command can do it but then there's no way to prevent
    #  it stripping bold/underline control characters when it's writing
    #  to a non-tty. The solution is that we call zsoelim ourselves.
    for line in subprocess.Popen('zsoelim | nroff -man', env={'PATH': os.environ['PATH'] + os.pathsep + os.pathsep.join(zsoelim_path.split(os.pathsep))}, stdin=in_handle, stdout=subprocess.PIPE, universal_newlines=True, shell=True).communicate()[0].split('\n'):
        ade.ade_err_debug(errstack, 10, 'process_handle: line=[%s]' % (line))
        rc, outline = process_line(errstack, line, filename)
        if rc != ade.ade_err_ok:
            return rc
        out_handle.write(outline + '\n')
    out_handle.write('</pre>\n')
    if full_flag:
        out_handle.write('</body>\n')
        out_handle.write('</html>\n')
    return ade.ade_err_ok

def process_line(errstack, inline, filename):
   outline = ''
   state = 'n'
   while True:
       if len(inline) == 0:
           break

       #  Leading spaces
       m = re.search(r'^( +)', inline)
       if m:
           outline += m.group(1)
           inline = inline[len(m.group(1)):]
           continue
    
       #  Check for bold at beginning
       m = re.search(r'^((?:(.)\2)+)', inline)
       if m:
           rc, html, state = switch_if_needed(errstack, 'b', state)
           if rc != ade.ade_err_ok:
               return rc
           outline += html
           rc, f = fix_gtlt_and_urls(errstack, ''.join([ c for (i,c) in enumerate(m.group(1)) if (i%3)==0 ]))
           if rc != ade.ade_err_ok:
               return rc
           outline += f
           inline = inline[len(m.group(1)):]
           continue
    
       #  Check for italic at beginning
       m = re.search(r'^((?:_.)+)', inline)
       if m:
           rc, html, state = switch_if_needed(errstack, 'i', state)
           if rc != ade.ade_err_ok:
               return rc
           outline += html
           rc, f = fix_gtlt_and_urls(errstack, ''.join([ c for (i,c) in enumerate(m.group(1)) if i%3==2 ]))
           if rc != ade.ade_err_ok:
               return rc
           outline += f
           inline = inline[len(m.group(1)):]
           continue

       #  Other (not spaces and not something-immediately-followed-by-ctrl-h)
       m = re.search(r'^(.+?)(?: |.|$)', inline)
       if m:
           rc, html, state = switch_if_needed(errstack, 'n', state)
           if rc != ade.ade_err_ok:
               return rc
           outline += html
           rc, f = fix_gtlt_and_urls(errstack, m.group(1))
           if rc != ade.ade_err_ok:
               return rc
           outline += f
           inline = inline[len(m.group(1)):]
           continue

   rc, html, state = switch_if_needed(errstack, 'n', state)
   if rc != ade.ade_err_ok:
       return rc
   outline += html

   return ade.ade_err_ok, outline

def fix_gtlt_and_urls(errstack, s):
    global url_regex

    s = re.sub('<', '&lt;', s)
    s = re.sub('>', '&gt;', s)
    s = re.sub('(%s)' % (url_regex), r'<a href="\1">\1</a>', s)

    return ade.ade_err_ok, s

def switch_if_needed(errstack, new_state, old_state):
    html = ''
    if new_state == old_state:
        pass
    else:
        if old_state == 'i':
            html += '</i>'
        elif old_state == 'b':
            html += '</b>'
        if new_state == 'i':
            html += '<i>'
        elif new_state == 'b':
            html += '<b>'
    return ade.ade_err_ok, html, new_state

def ademan2html_opt_handler_full(errstack):
    global opt_full
    opt_full = True
    return ade.ade_err_ok

ade.ade_gep_main(ademan2html)
