From: Michael D. Lowis Date: Tue, 18 Dec 2018 21:18:45 +0000 (-0500) Subject: more refactoring, renaming, etc.. X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=12ef2f480bfb0370baafdcb1c78587cd356ce29f;p=proto%2Flibregexp.git more refactoring, renaming, etc.. --- diff --git a/Makefile b/Makefile index d2636b2..93ee4a3 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC=gcc -CFLAGS += --std=c99 -pedantic -Wall -Wextra -O3 -c -g +CFLAGS += --std=c99 -pedantic -Wall -Wextra -Werror -O3 -c -g O=o LIB=libregexp9.a diff --git a/regcomp.c b/regcomp.c index cce3a12..96bdc71 100644 --- a/regcomp.c +++ b/regcomp.c @@ -22,39 +22,43 @@ static Reprog *reprog; static char* SrcExpr; /* pointer to next character in source expression */ #define NSTACK 20 -static Node OperandStack[NSTACK]; /* Stack of operands */ -static Node* OperandPtr; /* Pointer to the top of the operand stack */ -static int OperatorStack[NSTACK]; /* Stack of operators */ -static int* OperatorPtr; /* Pointer to the top of the operator stack */ - -static int cursubid; /* id of current subexpression */ -static int subidstack[NSTACK]; /* parallel to OperatorStack */ -static int* subidp; -static bool lastwasand; /* Last token was operand */ +static Node OperandStack[NSTACK]; /* Stack of operands */ +static Node* OperandPtr; /* Pointer to the top of the operand stack */ +static int OperatorStack[NSTACK]; /* Stack of operators */ +static int* OperatorPtr; /* Pointer to the top of the operator stack */ +static int SubExprStack[NSTACK]; /* parallel to OperatorStack */ +static int* SubExprPtr; /* Pointer to the top of the sub-expression stack */ + +static int CurrSubExpr; /* id of current subexpression */ +static bool LastWasOperand; /* Last token was operand */ + static int nparens; -static int lexdone; static unsigned int nclass; -static Reclass*classp; +static Reclass* classp; static Reinst* freep; -static int errors; static int yyrune; /* last lex'd rune */ static Reclass* yyclassp; /* last lex'd class */ /* predeclared crap */ +static Reinst* OpCode(int t); +static int NextRune(int* p_rune); +static int GetToken(int literal, int dot_type); +static void CompileError(char *s); static void PushOperand(Reinst*, Reinst*); static void PushOperator(int); -static Node* PopOperand(int op); +static Node* PopOperand(void); static int PopOperator(void); static int BuildClass(void); - -static void operator(int); -static void evaluntil(int); +static void Operator(int); +static void EvalUntil(int); +static Reprog* Optimize(Reprog *pp); +static Reprog* Compile(char *s, int literal, int dot_type); static jmp_buf regkaboom; /******************************************************************************/ -static Reinst* newinst(int t) { +static Reinst* OpCode(int t) { freep->type = t; freep->l.left = 0; freep->r.right = 0; @@ -64,16 +68,15 @@ static Reinst* newinst(int t) { /******************************************************************************/ static int NextRune(int* p_rune) { - if (lexdone) { + if (!*SrcExpr) { *p_rune = 0; return 1; } SrcExpr += chartorune(p_rune, SrcExpr); - if(*p_rune == '\\'){ + if (*p_rune == '\\') { SrcExpr += chartorune(p_rune, SrcExpr); return 1; } - lexdone = (*p_rune == 0); return 0; } @@ -103,61 +106,38 @@ static int GetToken(int literal, int dot_type) { /******************************************************************************/ -static void rcerror(char *s) { - errors++; +static void CompileError(char *s) { regerror(s); longjmp(regkaboom, 1); } -static void regerr2(char *s, int c) { - char buf[100]; - char *cp = buf; - while(*s) - *cp++ = *s++; - *cp++ = c; - *cp = '\0'; - rcerror(buf); -} - -static void cant(char *s) { - char buf[100]; - strncpy(buf, "can't happen: ", sizeof(buf)); - strncat(buf, s, sizeof(buf)-1); - rcerror(buf); -} - /******************************************************************************/ static void PushOperand(Reinst *f, Reinst *l) { - if(OperandPtr >= &OperandStack[NSTACK]) - cant("operand stack overflow"); + if (OperandPtr >= &OperandStack[NSTACK]) + CompileError("operand stack overflow"); OperandPtr->first = f; OperandPtr->last = l; OperandPtr++; } static void PushOperator(int t) { - if(OperatorPtr >= &OperatorStack[NSTACK]) - cant("operator stack overflow"); + if (OperatorPtr >= &OperatorStack[NSTACK]) + CompileError("operator stack overflow"); *OperatorPtr++ = t; - *subidp++ = cursubid; + *SubExprPtr++ = CurrSubExpr; } -static Node* PopOperand(int op) { - Reinst *inst; - - if(OperandPtr <= &OperandStack[0]){ - regerr2("missing operand for ", op); - inst = newinst(NOP); - PushOperand(inst,inst); - } +static Node* PopOperand(void) { + if (OperandPtr <= &OperandStack[0]) + CompileError("missing operand for operator"); return --OperandPtr; } static int PopOperator(void) { - if(OperatorPtr <= &OperatorStack[0]) - cant("operator stack underflow"); - --subidp; + if (OperatorPtr <= &OperatorStack[0]) + CompileError("operator stack underflow"); + --SubExprPtr; return *--OperatorPtr; } @@ -172,7 +152,7 @@ static int BuildClass(void) { /* we have already seen the '[' */ if (nclass >= nelem(reprog->class)) - rcerror("too many character classes; increase Reprog.class size"); + CompileError("too many character classes; increase Reprog.class size"); type = CCLASS; yyclassp = &(classp[nclass++]); @@ -190,19 +170,19 @@ static int BuildClass(void) { /* parse class into a set of spans */ while(ep < &r[NCCRUNE-1]){ if(rune == 0){ - rcerror("malformed '[]'"); + CompileError("malformed '[]'"); return 0; } if(!quoted && rune == ']') break; if(!quoted && rune == '-'){ if(ep == r){ - rcerror("malformed '[]'"); + CompileError("malformed '[]'"); return 0; } quoted = NextRune(&rune); if((!quoted && rune == ']') || rune == 0){ - rcerror("malformed '[]'"); + CompileError("malformed '[]'"); return 0; } *(ep-1) = rune; @@ -213,7 +193,7 @@ static int BuildClass(void) { quoted = NextRune(&rune); } if(ep >= &r[NCCRUNE-1]) { - rcerror("char class too large; increase Reclass.spans size"); + CompileError("char class too large; increase Reclass.spans size"); return 0; } @@ -257,94 +237,94 @@ static int BuildClass(void) { /******************************************************************************/ -static void operand(int t) { +static void Operand(int t) { Reinst *i; - if(lastwasand) - operator(CAT); /* catenate is implicit */ - i = newinst(t); + if (LastWasOperand) + Operator(CAT); /* catenate is implicit */ + i = OpCode(t); - if(t == CCLASS || t == NCCLASS) + if (t == CCLASS || t == NCCLASS) i->r.cp = yyclassp; - if(t == RUNE) + if (t == RUNE) i->r.r = yyrune; PushOperand(i, i); - lastwasand = true; + LastWasOperand = true; } -static void operator(int t) { +static void Operator(int t) { if(t==RPAREN && --nparens<0) - rcerror("unmatched right paren"); + CompileError("unmatched right paren"); if(t==LPAREN){ - if(++cursubid >= NSUBEXP) - rcerror("too many subexpressions"); + if(++CurrSubExpr >= NSUBEXP) + CompileError("too many subexpressions"); nparens++; - if(lastwasand) - operator(CAT); + if(LastWasOperand) + Operator(CAT); }else - evaluntil(t); + EvalUntil(t); if(t != RPAREN) PushOperator(t); - lastwasand = false; + LastWasOperand = false; if(t==STAR || t==QUEST || t==PLUS || t==RPAREN) - lastwasand = true; /* these look like operands */ + LastWasOperand = true; /* these look like operands */ } -static void evaluntil(int pri) { +static void EvalUntil(int pri) { Node *op1, *op2; Reinst *inst1, *inst2; while(pri==RPAREN || OperatorPtr[-1]>=pri){ switch(PopOperator()){ default: - rcerror("unknown operator in evaluntil"); + CompileError("unknown operator in evaluntil"); break; case LPAREN: /* must have been RPAREN */ - op1 = PopOperand('('); - inst2 = newinst(RPAREN); - inst2->r.subid = *subidp; + op1 = PopOperand(); + inst2 = OpCode(RPAREN); + inst2->r.subid = *SubExprPtr; op1->last->l.next = inst2; - inst1 = newinst(LPAREN); - inst1->r.subid = *subidp; + inst1 = OpCode(LPAREN); + inst1->r.subid = *SubExprPtr; inst1->l.next = op1->first; PushOperand(inst1, inst2); return; case OR: - op2 = PopOperand('|'); - op1 = PopOperand('|'); - inst2 = newinst(NOP); + op2 = PopOperand(); + op1 = PopOperand(); + inst2 = OpCode(NOP); op2->last->l.next = inst2; op1->last->l.next = inst2; - inst1 = newinst(OR); + inst1 = OpCode(OR); inst1->r.right = op1->first; inst1->l.left = op2->first; PushOperand(inst1, inst2); break; case CAT: - op2 = PopOperand(0); - op1 = PopOperand(0); + op2 = PopOperand(); + op1 = PopOperand(); op1->last->l.next = op2->first; PushOperand(op1->first, op2->last); break; case STAR: - op2 = PopOperand('*'); - inst1 = newinst(OR); + op2 = PopOperand(); + inst1 = OpCode(OR); op2->last->l.next = inst1; inst1->r.right = op2->first; PushOperand(inst1, inst1); break; case PLUS: - op2 = PopOperand('+'); - inst1 = newinst(OR); + op2 = PopOperand(); + inst1 = OpCode(OR); op2->last->l.next = inst1; inst1->r.right = op2->first; PushOperand(op2->first, inst1); break; case QUEST: - op2 = PopOperand('?'); - inst1 = newinst(OR); - inst2 = newinst(NOP); + op2 = PopOperand(); + inst1 = OpCode(OR); + inst2 = OpCode(NOP); inst1->l.left = inst2; inst1->r.right = op2->first; op2->last->l.next = inst2; @@ -354,7 +334,7 @@ static void evaluntil(int pri) { } } -static Reprog* optimize(Reprog *pp) { +static Reprog* Optimize(Reprog *pp) { Reinst *inst, *target; int size; Reprog *npp; @@ -403,7 +383,9 @@ static Reprog* optimize(Reprog *pp) { return npp; } -static Reprog* regcomp1(char *s, int literal, int dot_type) { +/******************************************************************************/ + +static Reprog* Compile(char *s, int literal, int dot_type) { int token; /* get memory for the program */ @@ -414,54 +396,52 @@ static Reprog* regcomp1(char *s, int literal, int dot_type) { } freep = pp->firstinst; classp = pp->class; - errors = 0; /* setup landing pad for fatal errors */ - if(setjmp(regkaboom)) + if (setjmp(regkaboom)) return (free(pp), NULL); /* go compile the sucker */ - lexdone = 0; SrcExpr = s; nclass = 0; nparens = 0; OperatorPtr = OperatorStack; OperandPtr = OperandStack; - subidp = subidstack; - lastwasand = false; - cursubid = 0; + SubExprPtr = SubExprStack; + LastWasOperand = false; + CurrSubExpr = 0; /* Start with a low priority operator to prime parser */ PushOperator(START-1); - while((token = GetToken(literal, dot_type)) != END){ - if((token & 0300) == OPERATOR) - operator(token); + while ((token = GetToken(literal, dot_type)) != END){ + if ((token & 0300) == OPERATOR) + Operator(token); else - operand(token); + Operand(token); } /* Close with a low priority operator */ - evaluntil(START); + EvalUntil(START); /* Force END */ - operand(END); - evaluntil(START); + Operand(END); + EvalUntil(START); if(nparens) - rcerror("unmatched left paren"); + CompileError("unmatched left paren"); --OperandPtr; /* points to first and only operand */ pp->startinst = OperandPtr->first; - pp = optimize(pp); + pp = Optimize(pp); return pp; } Reprog* regcomp(char *s) { - return regcomp1(s, 0, ANY); + return Compile(s, 0, ANY); } Reprog* regcomplit(char *s) { - return regcomp1(s, 1, ANY); + return Compile(s, 1, ANY); } Reprog* regcompnl(char *s) { - return regcomp1(s, 0, ANYNL); + return Compile(s, 0, ANYNL); } diff --git a/regerror.c b/regerror.c index 66a0e7b..28b0840 100644 --- a/regerror.c +++ b/regerror.c @@ -1,16 +1,12 @@ #include #include +#include #include #include "regexp9.h" void regerror(char *s) { - char buf[132]; - - strncpy(buf, "regerror: ", sizeof(buf)); - strncat(buf, s, sizeof(buf)-1); - strncat(buf, "\n", sizeof(buf)-1); - write(2, buf, strlen(buf)); + fprintf(stderr, "regerror: %s\n", s); exit(1); } diff --git a/test b/test deleted file mode 100755 index c6242ef..0000000 Binary files a/test and /dev/null differ diff --git a/test.c b/test.c index 4e697f2..a8706cc 100644 --- a/test.c +++ b/test.c @@ -12,17 +12,15 @@ struct x struct x t[] = { { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 }, - { "^local!(.*)$", "/mail/box/\\1/mbox", 0 }, - { "^plan9!(.*)$", "\\1", 0 }, - { "^helix!(.*)$", "\\1", 0 }, + { "^local:(.*)$", "/mail/box/\\1/mbox", 0 }, + { "^plan9:(.*)$", "\\1", 0 }, + { "^helix:(.*)$", "\\1", 0 }, { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 }, { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 }, { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 }, { "^\xE2\x98\xBA$", "smiley", 0 }, - { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 }, + { "^(coma|research|pipe|pyxis|inet|hunny|gauss):(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 }, { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, -// { "^(((((((((((((((((((((a)))))))))))))))))))))$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, - { "^((((((((((((((((((a)))))))))))))))))))))$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, { 0, 0, 0 }, }; diff --git a/test.o b/test.o deleted file mode 100644 index 169325c..0000000 Binary files a/test.o and /dev/null differ