#include "ded.h"
#include "match.h"
#include "char.h"

/************************************************************************
 *                                                                      *
 *                      copyright Richard Bornat 1981                   *
 *                                                                      *
 ************************************************************************/


/************************************************************************
 * backtracking pattern matcher, capable of making multi-line matches   *
 *                                                                      *
 * the parameters are as follows -                                      *
 *                                                                      *
 *  re : an RE pattern, built by build_re                               *
 *                                                                      *
 *  line, bufp : the current match position                             *
 *                                                                      *
 *  state : a variable which is                                         *
 *              BEGLINE at start of match or start of line,             *
 *              MIDLINE in the middle of a line,                        *
 *              ENDLINE after recognising end-of-line                   *
 *              and LBREAK when noticing control marks at end-of-line   *
 *                                                                      *
 *  NOTE : multi-line matches carry a run-time overhead,                *
 *      though by providing a special screen buffer I avoid use of the  *
 *      heap. The screen buffer manipulations are efficient and clear   *
 *      (I hope!) and in fact this procedure would be more complicated  *
 *      and less efficient had it used the heap or the stack.           *
 ************************************************************************/

int match(re, line, bufp, state)
register char *re;
int line;
register char *bufp;
int state;
 {  register char c;

#ifdef DBUG
     if (dbug('c'))
       dbugprintf("matching at re %8 line %d, buffer %d, state %d (space %d)",
		re, line, bufp-cachebuf(line), state, ((char *)&state)-curr_break);
#endif

    while (true)
     {  if (*bufp==c_CONTROL && state!=LBREAK && bufp[1]==0)
	 {  if (match(re, line, bufp, LBREAK)) /* try it with the mark */
	      return(true);
	    else        /* pretend it isn't there */
	      goto nxline;
	 }
	else
	switch (*re++)
	 { case p_FIN:
		/* success !!!! */
#ifdef DBUG
		if (dbug('c'))
		  dbugprintf("match found at re %8 line %d, buffer %d, state %d (space %d)",
		re, line, bufp-cachebuf(line), state, ((char *)&state)-curr_break);
#endif
		if (line>mt_line ||
			(line==mt_line &&
			 bufp-cachebuf(line)+MARGIN>mt_col)) return(false);
		else
		 {  g_fline = line;
		    g_bufp = bufp;
		    g_state = state;
		    return(true);
		 }

	    case p_CHAR:
		if (*re++ != *bufp++)
		 { re -= 2; break; }
		else
		 { state = MIDLINE; continue; }

	    /* separator between words ('<space>) */
	    case p_SEP:
		if ((c = *bufp)==c_SPACE || (c==0 && state!=ENDLINE))
		 {  while (*bufp==c_SPACE) bufp++;

		    /* tricky coding to allow for endline
		     * and invisible line-breaks
		     */
		    if (*bufp==0) state = ENDLINE;
		    else state = MIDLINE;

		    if (match(re-1, line, bufp, state)) return(true);
		    else
		    continue;
		 }
		else
		 { re--; break; }

	    case p_ANY:
		if (*bufp++==0)
		 { re--; break; }
		else
		 { state=MIDLINE; continue; }

	    case p_BOL:
		if (state==BEGLINE && bufp==cachebuf(line))
		 { state=MIDLINE; continue; }
		else
		 {  if (*bufp==0 && state!=BEGLINE) state=ENDLINE;
		    re--; break;
		 }

	    case p_nBOL: /* this is needed to implement silly sets */
		if (state==BEGLINE && bufp==cachebuf(line))
		  return(false);
		else continue;

	    case p_EOL:
		/* this code implements multiple end-line
		 * characters rather neatly
		 */
		if (state==ENDLINE)
		 { re--; break; }
		else
		if (*bufp != 0) return(false);
		else
		 { state = ENDLINE; continue; }

	   case p_TWOSTAR:
		/* almost as ONESTAR below, but first
		 * looks for the SHORTEST matching string
		 */
		if ( match(re+*re, line, bufp, state))
				return(true);
		/* else fall through */

	    case p_ONESTAR:
		/* There follows an iterated alternative.
		 * - *re is a jumparound marker
		 * - the alternative starts at re+1
		 * - search is for LONGEST matching string
		 *     with protection against non-moving search
		 */
		if ( match(re+1, line, bufp, state) &&
			moved(line, bufp, state) &&
			match(re-1, g_fline, g_bufp, g_state))
		  return(true);
		else /* ignore iteration */
		 { re += *re; continue; }

	   case p_TWOPLUS:
		/* almost as ONEPLUS below, but first
		 * looks for the SHORTEST matching string
		 */
		if (match(re+1, line, bufp, state))
		 {  if (match(re+*re, g_fline, g_bufp, g_state))
		      return(true);
		    else
		      goto MID_PLUS; /* sorry */
		 }
		else
		 { re--; break; }

	    case p_ONEPLUS:
		/* There follows an iterated alternative, which must
		 * occur at least once.
		 * - see ONESTAR above for explanation of code.
		 */
		if (match(re+1, line, bufp, state))
		 {
		 MID_PLUS:
		    {   int x_fline = g_fline;
			char *x_bufp = g_bufp;
			int x_state = g_state;

			if (moved(line, bufp, state) &&
				match(re-1, g_fline, g_bufp, g_state))
			  return(true);
			else
			 {  line = x_fline; bufp = x_bufp; state = x_state;
			    re += *re; continue;
			 }
		     }
		 }
		else
		 { re--; break; }

	    /************************************************************
	     *                                                          *
	     *          handle sets of characters                       *
	     *                                                          *
	     ************************************************************/

	    case p_INSET:
	    case p_OUTSET:
		if (state==ENDLINE)
		 { re--; break; }
		else
		 {  int res = matchset((c = *bufp), re+1, state);

		    if ((re[-1]==p_INSET && res==0) ||
			(re[-1]==p_OUTSET && res!=0) )
		      return(false);
		    else
		     {  if (c==0)
			 {  if (res==1) state=ENDLINE; /* found it */
			    else
			     { re--; break; } /* not if you ain't looking */
			 }
			else
			 {  state=MIDLINE;
			    if (res!=2) bufp++; /* see action on BOL */
			 }
			re += *re;
			continue;
		     }
		 }

	    case p_BRA:
		{   struct BR *b = &br_note[*re++];

		    b->bsline = line;
		    b->bscol = bufp-cachebuf(line)+MARGIN;
		    b->bfline = -1;
		    continue;
		}

	    case p_KET:
		{   struct BR *b = &br_note[*re++];

		    b->bfline = -1;

		    if (match(re, line, bufp, state))
		     {  b->bfline = line;
			b->bfcol = bufp-cachebuf(line)+MARGIN;
			return(true);
		     }
		    else
		      return(false);
		}

	    default:
		editerror("invalid pattern type (%d) in match", re[-1]);
	 }

	/* end of switch - come here on break and if end-of-line matched
	 * read in next line. HOWEVER -
	 * do not match past end of file, and do not match more than a
	 * single screen full of data
	 */
	if (state==ENDLINE) /* start a new line */
	  state=BEGLINE;
	else
	 {
#ifdef DBUG
	    if (dbug('c'))
	      dbugprintf("match failed at re %8 line %d, buffer %d, state %d (space %d)",
		re, line, bufp-cachebuf(line), state, ((char *)&state)-curr_break);
#endif
	    return(false);
	 }

	/* get the next line */
    nxline:
	line++;
	if (line>mt_line || line-ms_line>=NINROWS) return(false);
	else
	  bufp = cachebuf(line);

     } /* end of while (true) */
 }
/* end of match procedure */

/************************************************************************
 *                                                                      *
 *      procedure to help with iterated matches, to guard               *
 *      against non-moving (infinite) iterations                        *
 *                                                                      *
 ************************************************************************/

int moved(line, bufp, state)
int line; char *bufp; int state;
 { return(line!=g_fline || bufp!=g_bufp || state!=g_state); }

/************************************************************************
 *                                                                      *
 * procedure to help with matching character sets - e.g. '[a-z0-9']     *
 *                                                                      *
 ************************************************************************/

/* memo to self : do something about efficiency of 0-9, a-z etc. */
int matchset(c, set, state)
char c;
char *set;
int state;
 {  while(true)
      switch (*set++)
	{   case p_CHAR:
		if (c==*set++) return(1);
		else continue;

	    case p_BOL:
		if (state==BEGLINE) return(2);
		else continue;

	    case p_EOL:
		if (c==0) return(1);
		else continue;

	    case p_ONEOF:
		{   char c1 = *set++;
		    char c2 = *set++;

		    if (c1<=c && c<=c2)
		     { state=MIDLINE; return(1); }
		    else continue;
		}

	    case p_FIN:
		return(0);

	    default:
		editerror("invalid set type (%d) in matchset", set[-1]);
	 }
 }