#include "ded.h" #include "match.h" /************************************************************************ * * * copyright Richard Bornat 1981 * * * ************************************************************************/ char *re_p, *re_lim; /* global pointers to RE */ char blown[] = "!! RE too long"; char missing[] = "!! '%c missing"; char extra[] = "!! unmatched '%c"; /* forward declarations */ char *b_re(), *build_set(); char *build_re(str) char *str; { char *s; re_p = RE; re_lim = RE+RESIZE; br_count = 0; s = b_re(str, 0); *re_p++ = p_FIN; if (re_p>re_lim) fdiag(blown); #ifdef DBUG /* what am I producing */ if (dbug('a')) print_re(); #endif return(s); } /************************************************************************ * the routine that really does the work. Parameter names are, I hope, * * self-explanatory. It eats a string, putting the result in 're' * * and returning a pointer to the tail of the string. Only called * * from build_re and recursively within itself * ************************************************************************/ char *b_re(str,fin_ch) register char *str; char fin_ch; { while (true) { char *me_p = re_p; /* place at which latest match-expression starts */ register char c; if (re_p>re_lim) fdiag(blown); else switch (c = *str++) { case '\0': if (fin_ch==0) return(str-1); else fdiag(missing, fin_ch); case '\'': switch (c = *str++) { case '[': str = build_set(str); break; case '(': if (++br_count>=BRLIM) fdiag("!! too many brackets !!"); else { int count = br_count; *re_p++ = p_BRA; *re_p++ = count; str = b_re(str,')'); *re_p++ = p_KET; *re_p++ = count; break; } case '.': *re_p++ = p_ANY; break; case '^': *re_p++ = p_BOL; break; case '$': *re_p++ = p_EOL; break; case '\0': /* treat as trailing space */ str--; case ' ': *re_p++ = p_SEP; break; case '?': case '*': case '+': fdiag("match expression starts with '%c",c); break; case ']': case ')': if (fin_ch==c) return(str); else fdiag(extra, c); case '\'': case '/': case ';': *re_p++ = p_CHAR; *re_p++ = c; break; default: fdiag("?? invalid sequence '%c ??", c); } break; case ';': case '/': if (fin_ch==0) return(str); else fdiag(missing, fin_ch); default: *re_p++ = p_CHAR; *re_p++ = c; break; } /* the above matches one 'match-expression', which may be * followed by '?, '*, '**, '+ or '++ to specify option or * repetition. If such modifiers are repeated, it is just silly * but not necessarily an error. */ while (*str=='\'') { char mod_type; str++; switch (c = *str++) { case '*': case '+': /* look for ** and ++ */ if (*str==c) { str++; mod_type = (c=='*' ? p_TWOSTAR : p_TWOPLUS); } else mod_type = (c=='*' ? p_ONESTAR : p_ONEPLUS); break; case '?': mod_type = p_QUERY; break; default: /* whoops - it isn't a modifier at all */ str -= 2; goto endouter; } /* end of discrimination between modifiers */ *re_p++ = p_FIN; forward(me_p, mod_type); } /* end of while which searches for modifiers */ endouter: ; /* horrid label */ } /* end of while (true) */ } /************************************************************************ * * * insert a forward ref in the RE * * * ************************************************************************/ forward(at_p, type) register char *at_p; char type; { register char *re1 = re_p, *re2 = re_p+2; int diff; re_p = re2; while (re1>=at_p) *re2-- = *re1--; *at_p++ = type; if ((diff = re_p-at_p)>127) fdiag(blown); else *at_p = diff; } /************************************************************************ * * * procedure to build a representation of a * * set of characters such as '[a-z0-9'$,+'] * * * ************************************************************************/ char *build_set(str) char *str; { register char c, c1; char *me_p = re_p; char type = p_INSET; if (*str=='-') { type = p_OUTSET; str++; } while (true) { switch (c = *str++) { case '\'': switch (c = *str++) { case ']': /* cope with set which contains only BOL */ if (re_p==me_p+1 && *me_p==p_BOL) { if (type==p_OUTSET) *me_p=p_nBOL; return(str); } else { *re_p++ = p_FIN; forward(me_p, type); return(str); } case '\0': fdiag(missing, ']'); case '^': *re_p++ = p_BOL; continue; case '$': *re_p++ = p_EOL; continue; case '\'': case '/': case ';': *re_p++ = p_CHAR; *re_p++ = c; continue; default: fdiag("?? invalid sequence '['%c ??", c); } case '\0': fdiag(missing, ']'); default: if (*str=='-') { str++; if ( (c1 = *str++)=='\'' || c1==0) fdiag("?? character expected after dash ??"); else if (c>c1) fdiag("?? invalid sequence %c-%c ??",c,c1); else { *re_p++ = p_ONEOF; *re_p++ = c; *re_p++ = c1; } } else { *re_p++ = p_CHAR; *re_p++ = c; } continue; } } } #ifdef DBUG print_re() { register char *re1 = RE, *re2 = re_p; while (re1