From 81ffcefad2a59e392ce280806dd3d2d960e7d06d Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Tue, 11 Dec 2018 15:06:35 -0500 Subject: [PATCH] tabs to spaces --- Makefile | 2 +- regaux.c | 154 ++++----- regcomp.c | 894 ++++++++++++++++++++++++++--------------------------- regerror.c | 12 +- regexec.c | 402 ++++++++++++------------ regexp9.h | 105 +++---- regsub.c | 104 +++---- rregexec.c | 364 +++++++++++----------- rregsub.c | 110 +++---- test.c | 62 ++-- test2.c | 22 +- utf.c | 254 +++++++-------- 12 files changed, 1236 insertions(+), 1249 deletions(-) diff --git a/Makefile b/Makefile index 5f7f904..bad630e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC=gcc -CFLAGS+=-Wall -Wextra -O3 -c -g +CFLAGS += --std=c99 -pedantic -Wall -Wextra -O3 -c -g O=o LIB=libregexp9.a diff --git a/regaux.c b/regaux.c index 46fb250..0d73b29 100644 --- a/regaux.c +++ b/regaux.c @@ -9,50 +9,50 @@ extern void _renewmatch(Resub *mp, int ms, Resublist *sp) { - int i; + int i; - if(mp==0 || ms<=0) - return; - if(mp[0].s.sp==0 || sp->m[0].s.spm[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){ - for(i=0; im[i]; - for(; im[0].s.spm[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){ + for(i=0; im[i]; + for(; iinst; p++){ - if(p->inst == ip){ - if(sep->m[0].s.sp < p->se.m[0].s.sp){ - if(ms > 1) - p->se = *sep; - else - p->se.m[0] = sep->m[0]; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - p->se = *sep; - else - p->se.m[0] = sep->m[0]; - (++p)->inst = 0; - return p; + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sep->m[0].s.sp < p->se.m[0].s.sp){ + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + (++p)->inst = 0; + return p; } /* @@ -60,53 +60,53 @@ _renewthread(Relist *lp, /* _relist to add to */ * initial empty start pointer. */ extern Relist* -_renewemptythread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - int ms, - char *sp) /* pointers to subexpressions */ +_renewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + char *sp) /* pointers to subexpressions */ { - Relist *p; + Relist *p; - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if(sp < p->se.m[0].s.sp) { - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].s.sp = sp; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].s.sp = sp; - (++p)->inst = 0; - return p; + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sp < p->se.m[0].s.sp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + (++p)->inst = 0; + return p; } extern Relist* -_rrenewemptythread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - int ms, - Rune *rsp) /* pointers to subexpressions */ +_rrenewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + Rune *rsp) /* pointers to subexpressions */ { - Relist *p; + Relist *p; - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if(rsp < p->se.m[0].s.rsp) { - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].s.rsp = rsp; - } - return 0; - } - } - p->inst = ip; - if(ms > 1) - memset(&p->se, 0, sizeof(p->se)); - p->se.m[0].s.rsp = rsp; - (++p)->inst = 0; - return p; + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(rsp < p->se.m[0].s.rsp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + (++p)->inst = 0; + return p; } diff --git a/regcomp.c b/regcomp.c index 29fa49e..cb15d17 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5,8 +5,8 @@ #include "regexp9.h" #include "regcomp.h" -#define TRUE 1 -#define FALSE 0 +#define TRUE 1 +#define FALSE 0 /* * Parser Information @@ -14,556 +14,556 @@ typedef struct Node { - Reinst* first; - Reinst* last; + Reinst* first; + Reinst* last; }Node; /* max character classes per program is nelem(reprog->class) */ -static Reprog *reprog; +static Reprog *reprog; /* max rune ranges per character class is nelem(classp->spans)/2 */ -#define NCCRUNE nelem(classp->spans) - -#define NSTACK 20 -static Node andstack[NSTACK]; -static Node *andp; -static int atorstack[NSTACK]; -static int* atorp; -static int cursubid; /* id of current subexpression */ -static int subidstack[NSTACK]; /* parallel to atorstack */ -static int* subidp; -static int lastwasand; /* Last token was operand */ -static int nbra; -static char* exprp; /* pointer to next character in source expression */ -static int lexdone; -static unsigned int nclass; -static Reclass*classp; -static Reinst* freep; -static int errors; -static Rune yyrune; /* last lex'd rune */ -static Reclass*yyclassp; /* last lex'd class */ +#define NCCRUNE nelem(classp->spans) + +#define NSTACK 20 +static Node andstack[NSTACK]; +static Node *andp; +static int atorstack[NSTACK]; +static int* atorp; +static int cursubid; /* id of current subexpression */ +static int subidstack[NSTACK]; /* parallel to atorstack */ +static int* subidp; +static int lastwasand; /* Last token was operand */ +static int nbra; +static char* exprp; /* pointer to next character in source expression */ +static int lexdone; +static unsigned int nclass; +static Reclass*classp; +static Reinst* freep; +static int errors; +static Rune yyrune; /* last lex'd rune */ +static Reclass*yyclassp; /* last lex'd class */ /* predeclared crap */ -static void operator(int); -static void pushand(Reinst*, Reinst*); -static void pushator(int); -static void evaluntil(int); -static int bldcclass(void); +static void operator(int); +static void pushand(Reinst*, Reinst*); +static void pushator(int); +static void evaluntil(int); +static int bldcclass(void); static jmp_buf regkaboom; -static void +static void rcerror(char *s) { - errors++; - regerror(s); - longjmp(regkaboom, 1); + errors++; + regerror(s); + longjmp(regkaboom, 1); } -static Reinst* +static Reinst* newinst(int t) { - freep->type = t; - freep->l.left = 0; - freep->r.right = 0; - return freep++; + freep->type = t; + freep->l.left = 0; + freep->r.right = 0; + return freep++; } -static void +static void operand(int t) { - Reinst *i; + Reinst *i; - if(lastwasand) - operator(CAT); /* catenate is implicit */ - i = newinst(t); + if(lastwasand) + operator(CAT); /* catenate is implicit */ + i = newinst(t); - if(t == CCLASS || t == NCCLASS) - i->r.cp = yyclassp; - if(t == RUNE) - i->r.r = yyrune; + if(t == CCLASS || t == NCCLASS) + i->r.cp = yyclassp; + if(t == RUNE) + i->r.r = yyrune; - pushand(i, i); - lastwasand = TRUE; + pushand(i, i); + lastwasand = TRUE; } -static void +static void operator(int t) { - if(t==RBRA && --nbra<0) - rcerror("unmatched right paren"); - if(t==LBRA){ - if(++cursubid >= NSUBEXP) - rcerror("too many subexpressions"); - nbra++; - if(lastwasand) - operator(CAT); - }else - evaluntil(t); - if(t != RBRA) - pushator(t); - lastwasand = FALSE; - if(t==STAR || t==QUEST || t==PLUS || t==RBRA) - lastwasand = TRUE; /* these look like operands */ + if(t==RBRA && --nbra<0) + rcerror("unmatched right paren"); + if(t==LBRA){ + if(++cursubid >= NSUBEXP) + rcerror("too many subexpressions"); + nbra++; + if(lastwasand) + operator(CAT); + }else + evaluntil(t); + if(t != RBRA) + pushator(t); + lastwasand = FALSE; + if(t==STAR || t==QUEST || t==PLUS || t==RBRA) + lastwasand = TRUE; /* these look like operands */ } -static void +static void regerr2(char *s, int c) { - char buf[100]; - char *cp = buf; - while(*s) - *cp++ = *s++; - *cp++ = c; - *cp = '\0'; - rcerror(buf); + char buf[100]; + char *cp = buf; + while(*s) + *cp++ = *s++; + *cp++ = c; + *cp = '\0'; + rcerror(buf); } -static void +static void cant(char *s) { - char buf[100]; - strncpy(buf, "can't happen: ", sizeof(buf)); - strncat(buf, s, sizeof(buf)-1); - rcerror(buf); + char buf[100]; + strncpy(buf, "can't happen: ", sizeof(buf)); + strncat(buf, s, sizeof(buf)-1); + rcerror(buf); } -static void +static void pushand(Reinst *f, Reinst *l) { - if(andp >= &andstack[NSTACK]) - cant("operand stack overflow"); - andp->first = f; - andp->last = l; - andp++; + if(andp >= &andstack[NSTACK]) + cant("operand stack overflow"); + andp->first = f; + andp->last = l; + andp++; } -static void +static void pushator(int t) { - if(atorp >= &atorstack[NSTACK]) - cant("operator stack overflow"); - *atorp++ = t; - *subidp++ = cursubid; + if(atorp >= &atorstack[NSTACK]) + cant("operator stack overflow"); + *atorp++ = t; + *subidp++ = cursubid; } -static Node* +static Node* popand(int op) { - Reinst *inst; - - if(andp <= &andstack[0]){ - regerr2("missing operand for ", op); - inst = newinst(NOP); - pushand(inst,inst); - } - return --andp; + Reinst *inst; + + if(andp <= &andstack[0]){ + regerr2("missing operand for ", op); + inst = newinst(NOP); + pushand(inst,inst); + } + return --andp; } -static int +static int popator(void) { - if(atorp <= &atorstack[0]) - cant("operator stack underflow"); - --subidp; - return *--atorp; + if(atorp <= &atorstack[0]) + cant("operator stack underflow"); + --subidp; + return *--atorp; } -static void +static void evaluntil(int pri) { - Node *op1, *op2; - Reinst *inst1, *inst2; - - while(pri==RBRA || atorp[-1]>=pri){ - switch(popator()){ - default: - rcerror("unknown operator in evaluntil"); - break; - case LBRA: /* must have been RBRA */ - op1 = popand('('); - inst2 = newinst(RBRA); - inst2->r.subid = *subidp; - op1->last->l.next = inst2; - inst1 = newinst(LBRA); - inst1->r.subid = *subidp; - inst1->l.next = op1->first; - pushand(inst1, inst2); - return; - case OR: - op2 = popand('|'); - op1 = popand('|'); - inst2 = newinst(NOP); - op2->last->l.next = inst2; - op1->last->l.next = inst2; - inst1 = newinst(OR); - inst1->r.right = op1->first; - inst1->l.left = op2->first; - pushand(inst1, inst2); - break; - case CAT: - op2 = popand(0); - op1 = popand(0); - op1->last->l.next = op2->first; - pushand(op1->first, op2->last); - break; - case STAR: - op2 = popand('*'); - inst1 = newinst(OR); - op2->last->l.next = inst1; - inst1->r.right = op2->first; - pushand(inst1, inst1); - break; - case PLUS: - op2 = popand('+'); - inst1 = newinst(OR); - op2->last->l.next = inst1; - inst1->r.right = op2->first; - pushand(op2->first, inst1); - break; - case QUEST: - op2 = popand('?'); - inst1 = newinst(OR); - inst2 = newinst(NOP); - inst1->l.left = inst2; - inst1->r.right = op2->first; - op2->last->l.next = inst2; - pushand(inst1, inst2); - break; - } - } + Node *op1, *op2; + Reinst *inst1, *inst2; + + while(pri==RBRA || atorp[-1]>=pri){ + switch(popator()){ + default: + rcerror("unknown operator in evaluntil"); + break; + case LBRA: /* must have been RBRA */ + op1 = popand('('); + inst2 = newinst(RBRA); + inst2->r.subid = *subidp; + op1->last->l.next = inst2; + inst1 = newinst(LBRA); + inst1->r.subid = *subidp; + inst1->l.next = op1->first; + pushand(inst1, inst2); + return; + case OR: + op2 = popand('|'); + op1 = popand('|'); + inst2 = newinst(NOP); + op2->last->l.next = inst2; + op1->last->l.next = inst2; + inst1 = newinst(OR); + inst1->r.right = op1->first; + inst1->l.left = op2->first; + pushand(inst1, inst2); + break; + case CAT: + op2 = popand(0); + op1 = popand(0); + op1->last->l.next = op2->first; + pushand(op1->first, op2->last); + break; + case STAR: + op2 = popand('*'); + inst1 = newinst(OR); + op2->last->l.next = inst1; + inst1->r.right = op2->first; + pushand(inst1, inst1); + break; + case PLUS: + op2 = popand('+'); + inst1 = newinst(OR); + op2->last->l.next = inst1; + inst1->r.right = op2->first; + pushand(op2->first, inst1); + break; + case QUEST: + op2 = popand('?'); + inst1 = newinst(OR); + inst2 = newinst(NOP); + inst1->l.left = inst2; + inst1->r.right = op2->first; + op2->last->l.next = inst2; + pushand(inst1, inst2); + break; + } + } } -static Reprog* +static Reprog* optimize(Reprog *pp) { - Reinst *inst, *target; - int size; - Reprog *npp; - Reclass *cl; - int diff; - - /* - * get rid of NOOP chains - */ - for(inst=pp->firstinst; inst->type!=END; inst++){ - target = inst->l.next; - while(target->type == NOP) - target = target->l.next; - inst->l.next = target; - } - - /* - * The original allocation is for an area larger than - * necessary. Reallocate to the actual space used - * and then relocate the code. - */ - size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); - npp = realloc(pp, size); - if(npp==0 || npp==pp) - return pp; - diff = (char *)npp - (char *)pp; - freep = (Reinst *)((char *)freep + diff); - for(inst=npp->firstinst; insttype){ - case OR: - case STAR: - case PLUS: - case QUEST: - inst->r.right = (void*)((char*)inst->r.right + diff); - break; - case CCLASS: - case NCCLASS: - inst->r.right = (void*)((char*)inst->r.right + diff); - cl = inst->r.cp; - cl->end = (void*)((char*)cl->end + diff); - break; - } - inst->l.left = (void*)((char*)inst->l.left + diff); - } - npp->startinst = (void*)((char*)npp->startinst + diff); - return npp; + Reinst *inst, *target; + int size; + Reprog *npp; + Reclass *cl; + int diff; + + /* + * get rid of NOOP chains + */ + for(inst=pp->firstinst; inst->type!=END; inst++){ + target = inst->l.next; + while(target->type == NOP) + target = target->l.next; + inst->l.next = target; + } + + /* + * The original allocation is for an area larger than + * necessary. Reallocate to the actual space used + * and then relocate the code. + */ + size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); + npp = realloc(pp, size); + if(npp==0 || npp==pp) + return pp; + diff = (char *)npp - (char *)pp; + freep = (Reinst *)((char *)freep + diff); + for(inst=npp->firstinst; insttype){ + case OR: + case STAR: + case PLUS: + case QUEST: + inst->r.right = (void*)((char*)inst->r.right + diff); + break; + case CCLASS: + case NCCLASS: + inst->r.right = (void*)((char*)inst->r.right + diff); + cl = inst->r.cp; + cl->end = (void*)((char*)cl->end + diff); + break; + } + inst->l.left = (void*)((char*)inst->l.left + diff); + } + npp->startinst = (void*)((char*)npp->startinst + diff); + return npp; } -#ifdef DEBUG -static void +#ifdef DEBUG +static void dumpstack(void){ - Node *stk; - int *ip; - - printf("operators\n"); - for(ip=atorstack; ipfirst->type, stk->last->type); + Node *stk; + int *ip; + + printf("operators\n"); + for(ip=atorstack; ipfirst->type, stk->last->type); } -static void +static void dump(Reprog *pp) { - Reinst *l; - Rune *p; - - l = pp->firstinst; - do{ - printf("%d:\t0%o\t%d\t%d", (int)(l-pp->firstinst), l->type, - (int)(l->l.left-pp->firstinst), (int)(l->r.right-pp->firstinst)); - if(l->type == RUNE) - printf("\t%C\n", l->r.r); - else if(l->type == CCLASS || l->type == NCCLASS){ - printf("\t["); - if(l->type == NCCLASS) - printf("^"); - for(p = l->r.cp->spans; p < l->r.cp->end; p += 2) - if(p[0] == p[1]) - printf("%C", p[0]); - else - printf("%C-%C", p[0], p[1]); - printf("]\n"); - } else - printf("\n"); - }while(l++->type); + Reinst *l; + Rune *p; + + l = pp->firstinst; + do{ + printf("%d:\t0%o\t%d\t%d", (int)(l-pp->firstinst), l->type, + (int)(l->l.left-pp->firstinst), (int)(l->r.right-pp->firstinst)); + if(l->type == RUNE) + printf("\t%C\n", l->r.r); + else if(l->type == CCLASS || l->type == NCCLASS){ + printf("\t["); + if(l->type == NCCLASS) + printf("^"); + for(p = l->r.cp->spans; p < l->r.cp->end; p += 2) + if(p[0] == p[1]) + printf("%C", p[0]); + else + printf("%C-%C", p[0], p[1]); + printf("]\n"); + } else + printf("\n"); + }while(l++->type); } #endif -static Reclass* +static Reclass* newclass(void) { - if(nclass >= nelem(reprog->class)) - rcerror("too many character classes; increase Reprog.class size"); - return &(classp[nclass++]); + if(nclass >= nelem(reprog->class)) + rcerror("too many character classes; increase Reprog.class size"); + return &(classp[nclass++]); } -static int +static int nextc(Rune *rp) { - if(lexdone){ - *rp = 0; - return 1; - } - exprp += chartorune(rp, exprp); - if(*rp == '\\'){ - exprp += chartorune(rp, exprp); - return 1; - } - if(*rp == 0) - lexdone = 1; - return 0; + if(lexdone){ + *rp = 0; + return 1; + } + exprp += chartorune(rp, exprp); + if(*rp == '\\'){ + exprp += chartorune(rp, exprp); + return 1; + } + if(*rp == 0) + lexdone = 1; + return 0; } -static int +static int lex(int literal, int dot_type) { - int quoted; - - quoted = nextc(&yyrune); - if(literal || quoted){ - if(yyrune == 0) - return END; - return RUNE; - } - - switch(yyrune){ - case 0: - return END; - case '*': - return STAR; - case '?': - return QUEST; - case '+': - return PLUS; - case '|': - return OR; - case '.': - return dot_type; - case '(': - return LBRA; - case ')': - return RBRA; - case '^': - return BOL; - case '$': - return EOL; - case '[': - return bldcclass(); - } - return RUNE; + int quoted; + + quoted = nextc(&yyrune); + if(literal || quoted){ + if(yyrune == 0) + return END; + return RUNE; + } + + switch(yyrune){ + case 0: + return END; + case '*': + return STAR; + case '?': + return QUEST; + case '+': + return PLUS; + case '|': + return OR; + case '.': + return dot_type; + case '(': + return LBRA; + case ')': + return RBRA; + case '^': + return BOL; + case '$': + return EOL; + case '[': + return bldcclass(); + } + return RUNE; } static int bldcclass(void) { - int type; - Rune r[NCCRUNE]; - Rune *p, *ep, *np; - Rune rune; - int quoted; - - /* we have already seen the '[' */ - type = CCLASS; - yyclassp = newclass(); - - /* look ahead for negation */ - /* SPECIAL CASE!!! negated classes don't match \n */ - ep = r; - quoted = nextc(&rune); - if(!quoted && rune == '^'){ - type = NCCLASS; - quoted = nextc(&rune); - *ep++ = '\n'; - *ep++ = '\n'; - } - - /* parse class into a set of spans */ - while(ep < &r[NCCRUNE-1]){ - if(rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - if(!quoted && rune == ']') - break; - if(!quoted && rune == '-'){ - if(ep == r){ - rcerror("malformed '[]'"); - return 0; - } - quoted = nextc(&rune); - if((!quoted && rune == ']') || rune == 0){ - rcerror("malformed '[]'"); - return 0; - } - *(ep-1) = rune; - } else { - *ep++ = rune; - *ep++ = rune; - } - quoted = nextc(&rune); - } - if(ep >= &r[NCCRUNE-1]) { - rcerror("char class too large; increase Reclass.spans size"); - return 0; - } - - /* sort on span start */ - for(p = r; p < ep; p += 2){ - for(np = p; np < ep; np += 2) - if(*np < *p){ - rune = np[0]; - np[0] = p[0]; - p[0] = rune; - rune = np[1]; - np[1] = p[1]; - p[1] = rune; - } - } - - /* merge spans */ - np = yyclassp->spans; - p = r; - if(r == ep) - yyclassp->end = np; - else { - np[0] = *p++; - np[1] = *p++; - for(; p < ep; p += 2) - /* overlapping or adjacent ranges? */ - if(p[0] <= np[1] + 1){ - if(p[1] >= np[1]) - np[1] = p[1]; /* coalesce */ - } else { - np += 2; - np[0] = p[0]; - np[1] = p[1]; - } - yyclassp->end = np+2; - } - - return type; + int type; + Rune r[NCCRUNE]; + Rune *p, *ep, *np; + Rune rune; + int quoted; + + /* we have already seen the '[' */ + type = CCLASS; + yyclassp = newclass(); + + /* look ahead for negation */ + /* SPECIAL CASE!!! negated classes don't match \n */ + ep = r; + quoted = nextc(&rune); + if(!quoted && rune == '^'){ + type = NCCLASS; + quoted = nextc(&rune); + *ep++ = '\n'; + *ep++ = '\n'; + } + + /* parse class into a set of spans */ + while(ep < &r[NCCRUNE-1]){ + if(rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + if(!quoted && rune == ']') + break; + if(!quoted && rune == '-'){ + if(ep == r){ + rcerror("malformed '[]'"); + return 0; + } + quoted = nextc(&rune); + if((!quoted && rune == ']') || rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + *(ep-1) = rune; + } else { + *ep++ = rune; + *ep++ = rune; + } + quoted = nextc(&rune); + } + if(ep >= &r[NCCRUNE-1]) { + rcerror("char class too large; increase Reclass.spans size"); + return 0; + } + + /* sort on span start */ + for(p = r; p < ep; p += 2){ + for(np = p; np < ep; np += 2) + if(*np < *p){ + rune = np[0]; + np[0] = p[0]; + p[0] = rune; + rune = np[1]; + np[1] = p[1]; + p[1] = rune; + } + } + + /* merge spans */ + np = yyclassp->spans; + p = r; + if(r == ep) + yyclassp->end = np; + else { + np[0] = *p++; + np[1] = *p++; + for(; p < ep; p += 2) + /* overlapping or adjacent ranges? */ + if(p[0] <= np[1] + 1){ + if(p[1] >= np[1]) + np[1] = p[1]; /* coalesce */ + } else { + np += 2; + np[0] = p[0]; + np[1] = p[1]; + } + yyclassp->end = np+2; + } + + return type; } -static Reprog* +static Reprog* regcomp1(char *s, int literal, int dot_type) { - int token; - Reprog *volatile pp; - - /* get memory for the program */ - pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); - if(pp == 0){ - regerror("out of memory"); - return 0; - } - freep = pp->firstinst; - classp = pp->class; - errors = 0; - - if(setjmp(regkaboom)) - goto out; - - /* go compile the sucker */ - lexdone = 0; - exprp = s; - nclass = 0; - nbra = 0; - atorp = atorstack; - andp = andstack; - subidp = subidstack; - lastwasand = FALSE; - cursubid = 0; - - /* Start with a low priority operator to prime parser */ - pushator(START-1); - while((token = lex(literal, dot_type)) != END){ - if((token&0300) == OPERATOR) - operator(token); - else - operand(token); - } - - /* Close with a low priority operator */ - evaluntil(START); - - /* Force END */ - operand(END); - evaluntil(START); + int token; + Reprog *volatile pp; + + /* get memory for the program */ + pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); + if(pp == 0){ + regerror("out of memory"); + return 0; + } + freep = pp->firstinst; + classp = pp->class; + errors = 0; + + if(setjmp(regkaboom)) + goto out; + + /* go compile the sucker */ + lexdone = 0; + exprp = s; + nclass = 0; + nbra = 0; + atorp = atorstack; + andp = andstack; + subidp = subidstack; + lastwasand = FALSE; + cursubid = 0; + + /* Start with a low priority operator to prime parser */ + pushator(START-1); + while((token = lex(literal, dot_type)) != END){ + if((token&0300) == OPERATOR) + operator(token); + else + operand(token); + } + + /* Close with a low priority operator */ + evaluntil(START); + + /* Force END */ + operand(END); + evaluntil(START); #ifdef DEBUG - dumpstack(); + dumpstack(); #endif - if(nbra) - rcerror("unmatched left paren"); - --andp; /* points to first and only operand */ - pp->startinst = andp->first; + if(nbra) + rcerror("unmatched left paren"); + --andp; /* points to first and only operand */ + pp->startinst = andp->first; #ifdef DEBUG - dump(pp); + dump(pp); #endif - pp = optimize(pp); + pp = optimize(pp); #ifdef DEBUG - printf("start: %d\n", (int)(andp->first-pp->firstinst)); - dump(pp); + printf("start: %d\n", (int)(andp->first-pp->firstinst)); + dump(pp); #endif out: - if(errors){ - free(pp); - pp = 0; - } - return pp; + if(errors){ + free(pp); + pp = 0; + } + return pp; } -extern Reprog* +extern Reprog* regcomp(char *s) { - return regcomp1(s, 0, ANY); + return regcomp1(s, 0, ANY); } -extern Reprog* +extern Reprog* regcomplit(char *s) { - return regcomp1(s, 1, ANY); + return regcomp1(s, 1, ANY); } -extern Reprog* +extern Reprog* regcompnl(char *s) { - return regcomp1(s, 0, ANYNL); + return regcomp1(s, 0, ANYNL); } diff --git a/regerror.c b/regerror.c index e63f084..66a0e7b 100644 --- a/regerror.c +++ b/regerror.c @@ -6,11 +6,11 @@ void regerror(char *s) { - char buf[132]; + char buf[132]; - strncpy(buf, "regerror: ", sizeof(buf)); - strncat(buf, s, sizeof(buf)-1); - strncat(buf, "\n", sizeof(buf)-1); - write(2, buf, strlen(buf)); - exit(1); + strncpy(buf, "regerror: ", sizeof(buf)); + strncat(buf, s, sizeof(buf)-1); + strncat(buf, "\n", sizeof(buf)-1); + write(2, buf, strlen(buf)); + exit(1); } diff --git a/regexec.c b/regexec.c index df75cf1..4d0124b 100644 --- a/regexec.c +++ b/regexec.c @@ -4,228 +4,228 @@ /* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space */ static int -regexec1(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j +regexec1(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j ) { - int flag=0; - Reinst *inst; - Relist *tlp; - char *s; - int i, checkstart; - Rune r, *rp, *ep; - int n; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - char *p; + int flag=0; + Reinst *inst; + Relist *tlp; + char *s; + int i, checkstart; + Rune r, *rp, *ep; + int n; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + char *p; - match = 0; - checkstart = j->starttype; - if(mp) - for(i=0; irelist[0][0].inst = 0; - j->relist[1][0].inst = 0; + match = 0; + checkstart = j->starttype; + if(mp) + for(i=0; irelist[0][0].inst = 0; + j->relist[1][0].inst = 0; - /* Execute machine once for each character, including terminal NUL */ - s = j->starts; - do{ - /* fast check for first char */ - if(checkstart) { - switch(j->starttype) { - case RUNE: - p = utfrune(s, j->startchar); - if(p == 0 || s == j->eol) - return match; - s = p; - break; - case BOL: - if(s == bol) - break; - p = utfrune(s, '\n'); - if(p == 0 || s == j->eol) - return match; - s = p+1; - break; - } - } - r = *(unsigned char*)s; - if(r < Runeself) - n = 1; - else - n = chartorune(&r, s); + /* Execute machine once for each character, including terminal NUL */ + s = j->starts; + do{ + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + p = utfrune(s, j->startchar); + if(p == 0 || s == j->eol) + return match; + s = p; + break; + case BOL: + if(s == bol) + break; + p = utfrune(s, '\n'); + if(p == 0 || s == j->eol) + return match; + s = p+1; + break; + } + } + r = *(unsigned char*)s; + if(r < Runeself) + n = 1; + else + n = chartorune(&r, s); - /* switch run lists */ - tl = j->relist[flag]; - tle = j->reliste[flag]; - nl = j->relist[flag^=1]; - nle = j->reliste[flag]; - nl->inst = 0; + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; - /* Add first instruction to current list */ - if(match == 0) - _renewemptythread(tl, progp->startinst, ms, s); + /* Add first instruction to current list */ + if(match == 0) + _renewemptythread(tl, progp->startinst, ms, s); - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ - for(inst = tlp->inst; inst; inst = inst->l.next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r.r == r){ - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - } - break; - case LBRA: - tlp->se.m[inst->r.subid].s.sp = s; - continue; - case RBRA: - tlp->se.m[inst->r.subid].e.ep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(s == j->eol || r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].e.ep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } - break; - } - } - if(s == j->eol) - break; - checkstart = j->starttype && nl->inst==0; - s += n; - }while(r); - return match; + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ + for(inst = tlp->inst; inst; inst = inst->l.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->r.r == r){ + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + } + break; + case LBRA: + tlp->se.m[inst->r.subid].s.sp = s; + continue; + case RBRA: + tlp->se.m[inst->r.subid].e.ep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->eol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->r.cp->end; + for(rp = inst->r.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->r.cp->end; + for(rp = inst->r.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.ep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->eol) + break; + checkstart = j->starttype && nl->inst==0; + s += n; + }while(r); + return match; } static int -regexec2(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j +regexec2(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j ) { - int rv; - Relist *relist0, *relist1; + int rv; + Relist *relist0, *relist1; - /* mark space */ - relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist0 == NULL) - return -1; - relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist1 == NULL){ - free(relist0); - return -1; - } - j->relist[0] = relist0; - j->relist[1] = relist1; - j->reliste[0] = relist0 + BIGLISTSIZE - 2; - j->reliste[1] = relist1 + BIGLISTSIZE - 2; + /* mark space */ + relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist0 == NULL) + return -1; + relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist1 == NULL){ + free(relist0); + return -1; + } + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + BIGLISTSIZE - 2; + j->reliste[1] = relist1 + BIGLISTSIZE - 2; - rv = regexec1(progp, bol, mp, ms, j); - free(relist0); - free(relist1); - return rv; + rv = regexec1(progp, bol, mp, ms, j); + free(relist0); + free(relist1); + return rv; } extern int -regexec(Reprog *progp, /* program to run */ - char *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ +regexec(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ { - Reljunk j; - Relist relist0[LISTSIZE], relist1[LISTSIZE]; - int rv; + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; - /* - * use user-specified starting/ending location if specified - */ - j.starts = bol; - j.eol = 0; - if(mp && ms>0){ - if(mp->s.sp) - j.starts = mp->s.sp; - if(mp->e.ep) - j.eol = mp->e.ep; - } - j.starttype = 0; - j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) { - j.starttype = RUNE; - j.startchar = progp->startinst->r.r; - } - if(progp->startinst->type == BOL) - j.starttype = BOL; + /* + * use user-specified starting/ending location if specified + */ + j.starts = bol; + j.eol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.starts = mp->s.sp; + if(mp->e.ep) + j.eol = mp->e.ep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->r.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; - /* mark space */ - j.relist[0] = relist0; - j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 2; - j.reliste[1] = relist1 + nelem(relist1) - 2; + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; - rv = regexec1(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - rv = regexec2(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - return -1; + rv = regexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = regexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; } diff --git a/regexp9.h b/regexp9.h index b85cebb..962f693 100644 --- a/regexp9.h +++ b/regexp9.h @@ -1,79 +1,69 @@ #ifndef _REGEXP9_H_ #define _REGEXP9_H_ 1 -#if defined(__cplusplus) -extern "C" { -#endif - -#ifdef AUTOLIB -AUTOLIB(regexp9) -#endif #include "utf.h" -typedef struct Resub Resub; -typedef struct Reclass Reclass; -typedef struct Reinst Reinst; -typedef struct Reprog Reprog; +typedef struct Reinst Reinst; /* - * Sub expression matches + * Sub expression matches */ -struct Resub{ - union - { - char *sp; - Rune *rsp; - }s; - union - { - char *ep; - Rune *rep; - }e; -}; +typedef struct Resub { + union + { + char *sp; + Rune *rsp; + }s; + union + { + char *ep; + Rune *rep; + }e; +} Resub; /* - * character class, each pair of rune's defines a range + * character class, each pair of rune's defines a range */ -struct Reclass{ - Rune *end; - Rune spans[64]; -}; +typedef struct Reclass { + Rune *end; + Rune spans[64]; +} Reclass; /* - * Machine instructions + * Machine instructions */ -struct Reinst{ - int type; - union { - Reclass *cp; /* class pointer */ - Rune r; /* character */ - int subid; /* sub-expression id for RBRA and LBRA */ - Reinst *right; /* right child of OR */ - }r; - union { /* regexp relies on these two being in the same union */ - Reinst *left; /* left child of OR */ - Reinst *next; /* next instruction for CAT & LBRA */ - }l; +struct Reinst { + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }r; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }l; }; /* - * Reprogram definition + * Reprogram definition */ -struct Reprog{ - Reinst *startinst; /* start pc */ - Reclass class[16]; /* .data */ - Reinst firstinst[5]; /* .text */ -}; +typedef struct Reprog { + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +} Reprog; -extern Reprog *regcomp9(char*); -extern Reprog *regcomplit9(char*); -extern Reprog *regcompnl9(char*); -extern void regerror9(char*); -extern int regexec9(Reprog*, char*, Resub*, int); -extern void regsub9(char*, char*, int, Resub*, int); +extern Reprog *regcomp9(char*); +extern Reprog *regcomplit9(char*); +extern Reprog *regcompnl9(char*); +extern void regerror9(char*); +extern int regexec9(Reprog*, char*, Resub*, int); +extern void regsub9(char*, char*, int, Resub*, int); -extern int rregexec9(Reprog*, Rune*, Resub*, int); -extern void rregsub9(Rune*, Rune*, int, Resub*, int); +extern int rregexec9(Reprog*, Rune*, Resub*, int); +extern void rregsub9(Rune*, Rune*, int, Resub*, int); /* * Darwin simply cannot handle having routines that @@ -90,7 +80,4 @@ extern void rregsub9(Rune*, Rune*, int, Resub*, int); #define rregsub rregsub9 #endif -#if defined(__cplusplus) -} -#endif #endif diff --git a/regsub.c b/regsub.c index 4e94f6e..b33f256 100644 --- a/regsub.c +++ b/regsub.c @@ -1,58 +1,58 @@ #include "regexp9.h" /* substitute into one string using the matches from the last regexec() */ -extern void -regsub(char *sp, /* source string */ - char *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ +extern void +regsub(char *sp, /* source string */ + char *dp, /* destination string */ + int dlen, + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ { - char *ssp, *ep; - int i; + char *ssp, *ep; + int i; - ep = dp+dlen-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp!=0 && mp[i].s.sp != 0 && ms>i) - for(ssp = mp[i].s.sp; ssp < mp[i].e.ep; ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; - } - }else if(*sp == '&'){ - if(mp!=0 && mp[0].s.sp != 0 && ms>0) - for(ssp = mp[0].s.sp; ssp < mp[0].e.ep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; - } - sp++; - } - *dp = '\0'; + ep = dp+dlen-1; + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp!=0 && mp[i].s.sp != 0 && ms>i) + for(ssp = mp[i].s.sp; ssp < mp[i].e.ep; ssp++) + if(dp < ep) + *dp++ = *ssp; + break; + case '\\': + if(dp < ep) + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + if(dp < ep) + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp!=0 && mp[0].s.sp != 0 && ms>0) + for(ssp = mp[0].s.sp; ssp < mp[0].e.ep; ssp++) + if(dp < ep) + *dp++ = *ssp; + }else{ + if(dp < ep) + *dp++ = *sp; + } + sp++; + } + *dp = '\0'; } diff --git a/rregexec.c b/rregexec.c index 8af16b6..7ced021 100644 --- a/rregexec.c +++ b/rregexec.c @@ -2,209 +2,209 @@ #include "regcomp.h" /* - * return 0 if no match - * >0 if a match - * <0 if we ran out of _relist space + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space */ static int -rregexec1(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j) +rregexec1(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j) { - int flag=0; - Reinst *inst; - Relist *tlp; - Rune *s; - int i, checkstart; - Rune r, *rp, *ep; - Relist* tl; /* This list, next list */ - Relist* nl; - Relist* tle; /* ends of this and next list */ - Relist* nle; - int match; - Rune *p; + int flag=0; + Reinst *inst; + Relist *tlp; + Rune *s; + int i, checkstart; + Rune r, *rp, *ep; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + Rune *p; - match = 0; - checkstart = j->startchar; - if(mp) - for(i=0; irelist[0][0].inst = 0; - j->relist[1][0].inst = 0; + match = 0; + checkstart = j->startchar; + if(mp) + for(i=0; irelist[0][0].inst = 0; + j->relist[1][0].inst = 0; - /* Execute machine once for each character, including terminal NUL */ - s = j->rstarts; - do{ - /* fast check for first char */ - if(checkstart) { - switch(j->starttype) { - case RUNE: - p = runestrchr(s, j->startchar); - if(p == 0 || s == j->reol) - return match; - s = p; - break; - case BOL: - if(s == bol) - break; - p = runestrchr(s, '\n'); - if(p == 0 || s == j->reol) - return match; - s = p+1; - break; - } - } + /* Execute machine once for each character, including terminal NUL */ + s = j->rstarts; + do{ + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + p = runestrchr(s, j->startchar); + if(p == 0 || s == j->reol) + return match; + s = p; + break; + case BOL: + if(s == bol) + break; + p = runestrchr(s, '\n'); + if(p == 0 || s == j->reol) + return match; + s = p+1; + break; + } + } - r = *s; + r = *s; - /* switch run lists */ - tl = j->relist[flag]; - tle = j->reliste[flag]; - nl = j->relist[flag^=1]; - nle = j->reliste[flag]; - nl->inst = 0; + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; - /* Add first instruction to current list */ - _rrenewemptythread(tl, progp->startinst, ms, s); + /* Add first instruction to current list */ + _rrenewemptythread(tl, progp->startinst, ms, s); - /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ - for(inst=tlp->inst; ; inst = inst->l.next){ - switch(inst->type){ - case RUNE: /* regular character */ - if(inst->r.r == r) - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - case LBRA: - tlp->se.m[inst->r.subid].s.rsp = s; - continue; - case RBRA: - tlp->se.m[inst->r.subid].e.rep = s; - continue; - case ANY: - if(r != '\n') - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - case ANYNL: - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - case BOL: - if(s == bol || *(s-1) == '\n') - continue; - break; - case EOL: - if(s == j->reol || r == 0 || r == '\n') - continue; - break; - case CCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - } - break; - case NCCLASS: - ep = inst->r.cp->end; - for(rp = inst->r.cp->spans; rp < ep; rp += 2) - if(r >= rp[0] && r <= rp[1]) - break; - if(rp == ep) - if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) - return -1; - break; - case OR: - /* evaluate right choice later */ - if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle) - return -1; - /* efficiency: advance and re-evaluate */ - continue; - case END: /* Match! */ - match = 1; - tlp->se.m[0].e.rep = s; - if(mp != 0) - _renewmatch(mp, ms, &tlp->se); - break; - } - break; - } - } - if(s == j->reol) - break; - checkstart = j->startchar && nl->inst==0; - s++; - }while(r); - return match; + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ + for(inst=tlp->inst; ; inst = inst->l.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->r.r == r) + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + case LBRA: + tlp->se.m[inst->r.subid].s.rsp = s; + continue; + case RBRA: + tlp->se.m[inst->r.subid].e.rep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->reol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->r.cp->end; + for(rp = inst->r.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->r.cp->end; + for(rp = inst->r.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.rep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->reol) + break; + checkstart = j->startchar && nl->inst==0; + s++; + }while(r); + return match; } static int -rregexec2(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ - Reljunk *j +rregexec2(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j ) { - Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; + Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; - /* mark space */ - j->relist[0] = relist0; - j->relist[1] = relist1; - j->reliste[0] = relist0 + nelem(relist0) - 2; - j->reliste[1] = relist1 + nelem(relist1) - 2; + /* mark space */ + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + nelem(relist0) - 2; + j->reliste[1] = relist1 + nelem(relist1) - 2; - return rregexec1(progp, bol, mp, ms, j); + return rregexec1(progp, bol, mp, ms, j); } extern int -rregexec(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms) /* number of elements at mp */ +rregexec(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ { - Reljunk j; - Relist relist0[LISTSIZE], relist1[LISTSIZE]; - int rv; + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; - /* - * use user-specified starting/ending location if specified - */ - j.rstarts = bol; - j.reol = 0; - if(mp && ms>0){ - if(mp->s.sp) - j.rstarts = mp->s.rsp; - if(mp->e.ep) - j.reol = mp->e.rep; - } - j.starttype = 0; - j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) { - j.starttype = RUNE; - j.startchar = progp->startinst->r.r; - } - if(progp->startinst->type == BOL) - j.starttype = BOL; + /* + * use user-specified starting/ending location if specified + */ + j.rstarts = bol; + j.reol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.rstarts = mp->s.rsp; + if(mp->e.ep) + j.reol = mp->e.rep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->r.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; - /* mark space */ - j.relist[0] = relist0; - j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 2; - j.reliste[1] = relist1 + nelem(relist1) - 2; + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; - rv = rregexec1(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - rv = rregexec2(progp, bol, mp, ms, &j); - if(rv >= 0) - return rv; - return -1; + rv = rregexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = rregexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; } diff --git a/rregsub.c b/rregsub.c index 0b47799..7e2198c 100644 --- a/rregsub.c +++ b/rregsub.c @@ -1,61 +1,61 @@ #include "regexp9.h" /* substitute into one string using the matches from the last regexec() */ -extern void -rregsub(Rune *sp, /* source string */ - Rune *dp, /* destination string */ - int dlen, - Resub *mp, /* subexpression elements */ - int ms) /* number of elements pointed to by mp */ +extern void +rregsub(Rune *sp, /* source string */ + Rune *dp, /* destination string */ + int dlen, + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ { - Rune *ssp, *ep; - int i; + Rune *ssp, *ep; + int i; - ep = dp+(dlen/sizeof(Rune))-1; - while(*sp != '\0'){ - if(*sp == '\\'){ - switch(*++sp){ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - i = *sp-'0'; - if(mp!=0 && mp[i].s.rsp != 0 && ms>i) - for(ssp = mp[i].s.rsp; - ssp < mp[i].e.rep; - ssp++) - if(dp < ep) - *dp++ = *ssp; - break; - case '\\': - if(dp < ep) - *dp++ = '\\'; - break; - case '\0': - sp--; - break; - default: - if(dp < ep) - *dp++ = *sp; - break; - } - }else if(*sp == '&'){ - if(mp!=0 && mp[0].s.rsp != 0 && ms>0) - for(ssp = mp[0].s.rsp; - ssp < mp[0].e.rep; ssp++) - if(dp < ep) - *dp++ = *ssp; - }else{ - if(dp < ep) - *dp++ = *sp; - } - sp++; - } - *dp = '\0'; + ep = dp+(dlen/sizeof(Rune))-1; + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp!=0 && mp[i].s.rsp != 0 && ms>i) + for(ssp = mp[i].s.rsp; + ssp < mp[i].e.rep; + ssp++) + if(dp < ep) + *dp++ = *ssp; + break; + case '\\': + if(dp < ep) + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + if(dp < ep) + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp!=0 && mp[0].s.rsp != 0 && ms>0) + for(ssp = mp[0].s.rsp; + ssp < mp[0].e.rep; ssp++) + if(dp < ep) + *dp++ = *ssp; + }else{ + if(dp < ep) + *dp++ = *sp; + } + sp++; + } + *dp = '\0'; } diff --git a/test.c b/test.c index 6119070..11f728a 100644 --- a/test.c +++ b/test.c @@ -5,46 +5,46 @@ struct x { - char *re; - char *s; - Reprog *p; + char *re; + char *s; + Reprog *p; }; struct x t[] = { - { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 }, - { "^local!(.*)$", "/mail/box/\\1/mbox", 0 }, - { "^plan9!(.*)$", "\\1", 0 }, - { "^helix!(.*)$", "\\1", 0 }, - { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 }, - { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 }, - { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 }, - { "^\xE2\x98\xBA$", "smiley", 0 }, - { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 }, - { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, - { 0, 0, 0 }, + { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 }, + { "^local!(.*)$", "/mail/box/\\1/mbox", 0 }, + { "^plan9!(.*)$", "\\1", 0 }, + { "^helix!(.*)$", "\\1", 0 }, + { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 }, + { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 }, + { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 }, + { "^\xE2\x98\xBA$", "smiley", 0 }, + { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 }, + { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, + { 0, 0, 0 }, }; int main(int ac, char **av) { - Resub rs[10]; - char dst[128]; - struct x *tp; + Resub rs[10]; + char dst[128]; + struct x *tp; - if(ac != 2) - exit(1); + if(ac != 2) + exit(1); - for(tp = t; tp->re; tp++) - tp->p = regcomp(tp->re); + for(tp = t; tp->re; tp++) + tp->p = regcomp(tp->re); - for(tp = t; tp->re; tp++){ - printf("%s VIA %s", av[1], tp->re); - memset(rs, 0, sizeof rs); - if(regexec(tp->p, av[1], rs, 10)){ - regsub(tp->s, dst, sizeof dst, rs, 10); - printf(" sub %s -> %s", tp->s, dst); - } - printf("\n"); - } - exit(0); + for(tp = t; tp->re; tp++){ + printf("%s VIA %s", av[1], tp->re); + memset(rs, 0, sizeof rs); + if(regexec(tp->p, av[1], rs, 10)){ + regsub(tp->s, dst, sizeof dst, rs, 10); + printf(" sub %s -> %s", tp->s, dst); + } + printf("\n"); + } + exit(0); } diff --git a/test2.c b/test2.c index 8fb1deb..dda1805 100644 --- a/test2.c +++ b/test2.c @@ -6,16 +6,16 @@ int main() { - Resub rs[10]; - Reprog *p; - char *s; + Resub rs[10]; + Reprog *p; + char *s; - p = regcomp("[^a-z]"); - s = "\n"; - if(regexec(p, s, rs, 10)) - printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep); - s = "0"; - if(regexec(p, s, rs, 10)) - printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep); - exit(0); + p = regcomp("[^a-z]"); + s = "\n"; + if(regexec(p, s, rs, 10)) + printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep); + s = "0"; + if(regexec(p, s, rs, 10)) + printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep); + exit(0); } diff --git a/utf.c b/utf.c index 5eb4efb..5a27b68 100644 --- a/utf.c +++ b/utf.c @@ -16,147 +16,147 @@ enum { - Bit1 = 7, - Bitx = 6, - Bit2 = 5, - Bit3 = 4, - Bit4 = 3, - Bit5 = 2, - - T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ - Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ - T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ - T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ - T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ - T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ - - Maskx = (1< T1 - */ - c = *(unsigned char*)str; - if(c < Tx) { - *rune = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - c1 = *(unsigned char*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = l; - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(unsigned char*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - *rune = l; - return 3; - } - - /* - * four character sequence - * 10000-10FFFF => T4 Tx Tx Tx - */ - if(UTFmax >= 4) { - c3 = *(unsigned char*)(str+3) ^ Tx; - if(c3 & Testx) - goto bad; - if(c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if(l <= Rune3) - goto bad; - if(l > Runemax) - goto bad; - *rune = l; - return 4; - } - } - - /* - * bad decoding - */ + int c, c1, c2, c3; + long l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = *(unsigned char*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(unsigned char*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(unsigned char*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * four character sequence + * 10000-10FFFF => T4 Tx Tx Tx + */ + if(UTFmax >= 4) { + c3 = *(unsigned char*)(str+3) ^ Tx; + if(c3 & Testx) + goto bad; + if(c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if(l <= Rune3) + goto bad; + if(l > Runemax) + goto bad; + *rune = l; + return 4; + } + } + + /* + * bad decoding + */ bad: - *rune = Bad; - return 1; + *rune = Bad; + return 1; } Rune* runestrchr(Rune *s, Rune c) { - Rune c0 = c; - Rune c1; - - if(c == 0) { - while(*s++) - ; - return s-1; - } - - while((c1 = *s++)) - if(c1 == c0) - return s-1; - return 0; + Rune c0 = c; + Rune c1; + + if(c == 0) { + while(*s++) + ; + return s-1; + } + + while((c1 = *s++)) + if(c1 == c0) + return s-1; + return 0; } char* utfrune(char *s, Rune c) { - Rune c1; - Rune r; - int n; - - if(c < Runesync) /* not part of utf sequence */ - return strchr(s, c); - - for(;;) { - c1 = *(unsigned char*)s; - if(c1 < Runeself) { /* one byte rune */ - if(c1 == 0) - return 0; - if(c1 == c) - return s; - s++; - continue; - } - n = chartorune(&r, s); - if(r == c) - return s; - s += n; - } + Rune c1; + Rune r; + int n; + + if(c < Runesync) /* not part of utf sequence */ + return strchr(s, c); + + for(;;) { + c1 = *(unsigned char*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return 0; + if(c1 == c) + return s; + s++; + continue; + } + n = chartorune(&r, s); + if(r == c) + return s; + s += n; + } } -- 2.52.0