#include "ded.h" #include "match.h" #include "char.h" /************************************************************************ * * * copyright Richard Bornat 1981 * * * ************************************************************************/ /************************************************************************ * backtracking pattern matcher, capable of making multi-line matches * * * * the parameters are as follows - * * * * re : an RE pattern, built by build_re * * * * line, bufp : the current match position * * * * state : a variable which is * * BEGLINE at start of match or start of line, * * MIDLINE in the middle of a line, * * ENDLINE after recognising end-of-line * * and LBREAK when noticing control marks at end-of-line * * * * NOTE : multi-line matches carry a run-time overhead, * * though by providing a special screen buffer I avoid use of the * * heap. The screen buffer manipulations are efficient and clear * * (I hope!) and in fact this procedure would be more complicated * * and less efficient had it used the heap or the stack. * ************************************************************************/ int match(re, line, bufp, state) register char *re; int line; register char *bufp; int state; { register char c; #ifdef DBUG if (dbug('c')) dbugprintf("matching at re %8 line %d, buffer %d, state %d (space %d)", re, line, bufp-cachebuf(line), state, ((char *)&state)-curr_break); #endif while (true) { if (*bufp==c_CONTROL && state!=LBREAK && bufp[1]==0) { if (match(re, line, bufp, LBREAK)) /* try it with the mark */ return(true); else /* pretend it isn't there */ goto nxline; } else switch (*re++) { case p_FIN: /* success !!!! */ #ifdef DBUG if (dbug('c')) dbugprintf("match found at re %8 line %d, buffer %d, state %d (space %d)", re, line, bufp-cachebuf(line), state, ((char *)&state)-curr_break); #endif if (line>mt_line || (line==mt_line && bufp-cachebuf(line)+MARGIN>mt_col)) return(false); else { g_fline = line; g_bufp = bufp; g_state = state; return(true); } case p_CHAR: if (*re++ != *bufp++) { re -= 2; break; } else { state = MIDLINE; continue; } /* separator between words (') */ case p_SEP: if ((c = *bufp)==c_SPACE || (c==0 && state!=ENDLINE)) { while (*bufp==c_SPACE) bufp++; /* tricky coding to allow for endline * and invisible line-breaks */ if (*bufp==0) state = ENDLINE; else state = MIDLINE; if (match(re-1, line, bufp, state)) return(true); else continue; } else { re--; break; } case p_ANY: if (*bufp++==0) { re--; break; } else { state=MIDLINE; continue; } case p_BOL: if (state==BEGLINE && bufp==cachebuf(line)) { state=MIDLINE; continue; } else { if (*bufp==0 && state!=BEGLINE) state=ENDLINE; re--; break; } case p_nBOL: /* this is needed to implement silly sets */ if (state==BEGLINE && bufp==cachebuf(line)) return(false); else continue; case p_EOL: /* this code implements multiple end-line * characters rather neatly */ if (state==ENDLINE) { re--; break; } else if (*bufp != 0) return(false); else { state = ENDLINE; continue; } case p_TWOSTAR: /* almost as ONESTAR below, but first * looks for the SHORTEST matching string */ if ( match(re+*re, line, bufp, state)) return(true); /* else fall through */ case p_ONESTAR: /* There follows an iterated alternative. * - *re is a jumparound marker * - the alternative starts at re+1 * - search is for LONGEST matching string * with protection against non-moving search */ if ( match(re+1, line, bufp, state) && moved(line, bufp, state) && match(re-1, g_fline, g_bufp, g_state)) return(true); else /* ignore iteration */ { re += *re; continue; } case p_TWOPLUS: /* almost as ONEPLUS below, but first * looks for the SHORTEST matching string */ if (match(re+1, line, bufp, state)) { if (match(re+*re, g_fline, g_bufp, g_state)) return(true); else goto MID_PLUS; /* sorry */ } else { re--; break; } case p_ONEPLUS: /* There follows an iterated alternative, which must * occur at least once. * - see ONESTAR above for explanation of code. */ if (match(re+1, line, bufp, state)) { MID_PLUS: { int x_fline = g_fline; char *x_bufp = g_bufp; int x_state = g_state; if (moved(line, bufp, state) && match(re-1, g_fline, g_bufp, g_state)) return(true); else { line = x_fline; bufp = x_bufp; state = x_state; re += *re; continue; } } } else { re--; break; } /************************************************************ * * * handle sets of characters * * * ************************************************************/ case p_INSET: case p_OUTSET: if (state==ENDLINE) { re--; break; } else { int res = matchset((c = *bufp), re+1, state); if ((re[-1]==p_INSET && res==0) || (re[-1]==p_OUTSET && res!=0) ) return(false); else { if (c==0) { if (res==1) state=ENDLINE; /* found it */ else { re--; break; } /* not if you ain't looking */ } else { state=MIDLINE; if (res!=2) bufp++; /* see action on BOL */ } re += *re; continue; } } case p_BRA: { struct BR *b = &br_note[*re++]; b->bsline = line; b->bscol = bufp-cachebuf(line)+MARGIN; b->bfline = -1; continue; } case p_KET: { struct BR *b = &br_note[*re++]; b->bfline = -1; if (match(re, line, bufp, state)) { b->bfline = line; b->bfcol = bufp-cachebuf(line)+MARGIN; return(true); } else return(false); } default: editerror("invalid pattern type (%d) in match", re[-1]); } /* end of switch - come here on break and if end-of-line matched * read in next line. HOWEVER - * do not match past end of file, and do not match more than a * single screen full of data */ if (state==ENDLINE) /* start a new line */ state=BEGLINE; else { #ifdef DBUG if (dbug('c')) dbugprintf("match failed at re %8 line %d, buffer %d, state %d (space %d)", re, line, bufp-cachebuf(line), state, ((char *)&state)-curr_break); #endif return(false); } /* get the next line */ nxline: line++; if (line>mt_line || line-ms_line>=NINROWS) return(false); else bufp = cachebuf(line); } /* end of while (true) */ } /* end of match procedure */ /************************************************************************ * * * procedure to help with iterated matches, to guard * * against non-moving (infinite) iterations * * * ************************************************************************/ int moved(line, bufp, state) int line; char *bufp; int state; { return(line!=g_fline || bufp!=g_bufp || state!=g_state); } /************************************************************************ * * * procedure to help with matching character sets - e.g. '[a-z0-9'] * * * ************************************************************************/ /* memo to self : do something about efficiency of 0-9, a-z etc. */ int matchset(c, set, state) char c; char *set; int state; { while(true) switch (*set++) { case p_CHAR: if (c==*set++) return(1); else continue; case p_BOL: if (state==BEGLINE) return(2); else continue; case p_EOL: if (c==0) return(1); else continue; case p_ONEOF: { char c1 = *set++; char c2 = *set++; if (c1<=c && c<=c2) { state=MIDLINE; return(1); } else continue; } case p_FIN: return(0); default: editerror("invalid set type (%d) in matchset", set[-1]); } }