]> git.mdlowis.com Git - proto/libregexp.git/commitdiff
minor refactoring and renaming of regex compilation process
authorMichael D. Lowis <mike.lowis@gentex.com>
Tue, 18 Dec 2018 18:59:12 +0000 (13:59 -0500)
committerMichael D. Lowis <mike.lowis@gentex.com>
Tue, 18 Dec 2018 18:59:12 +0000 (13:59 -0500)
.gitignore
regcomp.c

index 6e92f57d4647a41113f50e94f59047114f7e1d81..25b12e208dcf8143be3273752f79373457fa85e5 100644 (file)
@@ -1 +1,3 @@
 tags
+*.o
+*.a
index 4adc7efeafdf87b6c1920a65df8d6603f12e536f..cce3a1251f49bdd81b52bbbbf6306fa1eae17129 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -21,14 +21,14 @@ static Reprog    *reprog;
 
 static char* SrcExpr; /* pointer to next character in source expression */
 
-#define    NSTACK    20
-static    Node   andstack[NSTACK];  /* Stack of operands */
-static    Node*  andp;              /* Pointer to the top of the operand stack */
-static    int    atorstack[NSTACK]; /* Stack of operators */
-static    int*   atorp;             /* Pointer to the top of the operator stack */
+#define NSTACK 20
+static    Node   OperandStack[NSTACK];  /* Stack of operands */
+static    Node*  OperandPtr;              /* Pointer to the top of the operand stack */
+static    int    OperatorStack[NSTACK]; /* Stack of operators */
+static    int*   OperatorPtr;             /* Pointer to the top of the operator stack */
 
 static    int    cursubid;        /* id of current subexpression */
-static    int    subidstack[NSTACK];    /* parallel to atorstack */
+static    int    subidstack[NSTACK];    /* parallel to OperatorStack */
 static    int*    subidp;
 static    bool    lastwasand;    /* Last token was operand */
 static    int    nparens;
@@ -41,17 +41,29 @@ static    int    yyrune;        /* last lex'd rune */
 static    Reclass* yyclassp;    /* last lex'd class */
 
 /* predeclared crap */
-static    void    operator(int);
-static    void    pushand(Reinst*, Reinst*);
-static    void    pushator(int);
-static    void    evaluntil(int);
-static    int    bldcclass(void);
+static void PushOperand(Reinst*, Reinst*);
+static void PushOperator(int);
+static Node* PopOperand(int op);
+static int PopOperator(void);
+static int BuildClass(void);
+
+static void operator(int);
+static void evaluntil(int);
 
 static jmp_buf regkaboom;
 
 /******************************************************************************/
 
-static int nextc(int* p_rune) {
+static Reinst* newinst(int t) {
+    freep->type = t;
+    freep->l.left = 0;
+    freep->r.right = 0;
+    return freep++;
+}
+
+/******************************************************************************/
+
+static int NextRune(int* p_rune) {
     if (lexdone) {
         *p_rune = 0;
         return 1;
@@ -65,8 +77,8 @@ static int nextc(int* p_rune) {
     return 0;
 }
 
-static int lex(int literal, int dot_type) {
-    int quoted = nextc(&yyrune);
+static int GetToken(int literal, int dot_type) {
+    int quoted = NextRune(&yyrune);
     if (literal || quoted) {
         if (yyrune == 0)
             return END;
@@ -84,7 +96,7 @@ static int lex(int literal, int dot_type) {
         case ')': return RPAREN;
         case '^': return BOL;
         case '$': return EOL;
-        case '[': return bldcclass();
+        case '[': return BuildClass();
     }
     return RUNE;
 }
@@ -116,13 +128,135 @@ static void cant(char *s) {
 
 /******************************************************************************/
 
-static Reinst* newinst(int t) {
-    freep->type = t;
-    freep->l.left = 0;
-    freep->r.right = 0;
-    return freep++;
+static void PushOperand(Reinst *f, Reinst *l) {
+    if(OperandPtr >= &OperandStack[NSTACK])
+        cant("operand stack overflow");
+    OperandPtr->first = f;
+    OperandPtr->last = l;
+    OperandPtr++;
+}
+
+static void PushOperator(int t) {
+    if(OperatorPtr >= &OperatorStack[NSTACK])
+        cant("operator stack overflow");
+    *OperatorPtr++ = t;
+    *subidp++ = cursubid;
+}
+
+static Node* PopOperand(int op) {
+    Reinst *inst;
+
+    if(OperandPtr <= &OperandStack[0]){
+        regerr2("missing operand for ", op);
+        inst = newinst(NOP);
+        PushOperand(inst,inst);
+    }
+    return --OperandPtr;
+}
+
+static int PopOperator(void) {
+    if(OperatorPtr <= &OperatorStack[0])
+        cant("operator stack underflow");
+    --subidp;
+    return *--OperatorPtr;
+}
+
+/******************************************************************************/
+
+static int BuildClass(void) {
+    int type;
+    int r[NCCRUNE];
+    int *p, *ep, *np;
+    int rune;
+    int quoted;
+
+    /* we have already seen the '[' */
+    if (nclass >= nelem(reprog->class))
+        rcerror("too many character classes; increase Reprog.class size");
+    type = CCLASS;
+    yyclassp = &(classp[nclass++]);
+
+    /* look ahead for negation */
+    /* SPECIAL CASE!!! negated classes don't match \n */
+    ep = r;
+    quoted = NextRune(&rune);
+    if(!quoted && rune == '^'){
+        type = NCCLASS;
+        quoted = NextRune(&rune);
+        *ep++ = '\n';
+        *ep++ = '\n';
+    }
+
+    /* parse class into a set of spans */
+    while(ep < &r[NCCRUNE-1]){
+        if(rune == 0){
+            rcerror("malformed '[]'");
+            return 0;
+        }
+        if(!quoted && rune == ']')
+            break;
+        if(!quoted && rune == '-'){
+            if(ep == r){
+                rcerror("malformed '[]'");
+                return 0;
+            }
+            quoted = NextRune(&rune);
+            if((!quoted && rune == ']') || rune == 0){
+                rcerror("malformed '[]'");
+                return 0;
+            }
+            *(ep-1) = rune;
+        } else {
+            *ep++ = rune;
+            *ep++ = rune;
+        }
+        quoted = NextRune(&rune);
+    }
+    if(ep >= &r[NCCRUNE-1]) {
+        rcerror("char class too large; increase Reclass.spans size");
+        return 0;
+    }
+
+    /* sort on span start */
+    for(p = r; p < ep; p += 2){
+        for(np = p; np < ep; np += 2)
+            if(*np < *p){
+                rune = np[0];
+                np[0] = p[0];
+                p[0] = rune;
+                rune = np[1];
+                np[1] = p[1];
+                p[1] = rune;
+            }
+    }
+
+    /* merge spans */
+    np = yyclassp->spans;
+    p = r;
+    if(r == ep)
+        yyclassp->end = np;
+    else {
+        np[0] = *p++;
+        np[1] = *p++;
+        for(; p < ep; p += 2)
+            /* overlapping or adjacent ranges? */
+            if(p[0] <= np[1] + 1){
+                if(p[1] >= np[1])
+                    np[1] = p[1];    /* coalesce */
+            } else {
+                np += 2;
+                np[0] = p[0];
+                np[1] = p[1];
+            }
+        yyclassp->end = np+2;
+    }
+
+    return type;
 }
 
+
+/******************************************************************************/
+
 static void operand(int t) {
     Reinst *i;
 
@@ -135,7 +269,7 @@ static void operand(int t) {
     if(t == RUNE)
         i->r.r = yyrune;
 
-    pushand(i, i);
+    PushOperand(i, i);
     lastwasand = true;
 }
 
@@ -151,103 +285,70 @@ static void operator(int t) {
     }else
         evaluntil(t);
     if(t != RPAREN)
-        pushator(t);
+        PushOperator(t);
     lastwasand = false;
     if(t==STAR || t==QUEST || t==PLUS || t==RPAREN)
         lastwasand = true;    /* these look like operands */
 }
 
-static void pushand(Reinst *f, Reinst *l) {
-    if(andp >= &andstack[NSTACK])
-        cant("operand stack overflow");
-    andp->first = f;
-    andp->last = l;
-    andp++;
-}
-
-static void pushator(int t) {
-    if(atorp >= &atorstack[NSTACK])
-        cant("operator stack overflow");
-    *atorp++ = t;
-    *subidp++ = cursubid;
-}
-
-static Node* popand(int op) {
-    Reinst *inst;
-
-    if(andp <= &andstack[0]){
-        regerr2("missing operand for ", op);
-        inst = newinst(NOP);
-        pushand(inst,inst);
-    }
-    return --andp;
-}
-
-static int popator(void) {
-    if(atorp <= &atorstack[0])
-        cant("operator stack underflow");
-    --subidp;
-    return *--atorp;
-}
-
 static void evaluntil(int pri) {
     Node *op1, *op2;
     Reinst *inst1, *inst2;
 
-    while(pri==RPAREN || atorp[-1]>=pri){
-        switch(popator()){
+    while(pri==RPAREN || OperatorPtr[-1]>=pri){
+        switch(PopOperator()){
         default:
             rcerror("unknown operator in evaluntil");
             break;
         case LPAREN:        /* must have been RPAREN */
-            op1 = popand('(');
+            op1 = PopOperand('(');
             inst2 = newinst(RPAREN);
             inst2->r.subid = *subidp;
             op1->last->l.next = inst2;
             inst1 = newinst(LPAREN);
             inst1->r.subid = *subidp;
             inst1->l.next = op1->first;
-            pushand(inst1, inst2);
+            PushOperand(inst1, inst2);
             return;
         case OR:
-            op2 = popand('|');
-            op1 = popand('|');
+            op2 = PopOperand('|');
+            op1 = PopOperand('|');
             inst2 = newinst(NOP);
             op2->last->l.next = inst2;
             op1->last->l.next = inst2;
             inst1 = newinst(OR);
             inst1->r.right = op1->first;
             inst1->l.left = op2->first;
-            pushand(inst1, inst2);
+            PushOperand(inst1, inst2);
             break;
         case CAT:
-            op2 = popand(0);
-            op1 = popand(0);
+            op2 = PopOperand(0);
+            op1 = PopOperand(0);
             op1->last->l.next = op2->first;
-            pushand(op1->first, op2->last);
+            PushOperand(op1->first, op2->last);
             break;
         case STAR:
-            op2 = popand('*');
+            op2 = PopOperand('*');
             inst1 = newinst(OR);
             op2->last->l.next = inst1;
             inst1->r.right = op2->first;
-            pushand(inst1, inst1);
+            PushOperand(inst1, inst1);
             break;
         case PLUS:
-            op2 = popand('+');
+            op2 = PopOperand('+');
             inst1 = newinst(OR);
             op2->last->l.next = inst1;
             inst1->r.right = op2->first;
-            pushand(op2->first, inst1);
+            PushOperand(op2->first, inst1);
             break;
         case QUEST:
-            op2 = popand('?');
+            op2 = PopOperand('?');
             inst1 = newinst(OR);
             inst2 = newinst(NOP);
             inst1->l.left = inst2;
             inst1->r.right = op2->first;
             op2->last->l.next = inst2;
-            pushand(inst1, inst2);
+            PushOperand(inst1, inst2);
             break;
         }
     }
@@ -302,101 +403,6 @@ static Reprog* optimize(Reprog *pp) {
     return npp;
 }
 
-static Reclass* newclass(void) {
-    if(nclass >= nelem(reprog->class))
-        rcerror("too many character classes; increase Reprog.class size");
-    return &(classp[nclass++]);
-}
-
-static int bldcclass(void) {
-    int type;
-    int r[NCCRUNE];
-    int *p, *ep, *np;
-    int rune;
-    int quoted;
-
-    /* we have already seen the '[' */
-    type = CCLASS;
-    yyclassp = newclass();
-
-    /* look ahead for negation */
-    /* SPECIAL CASE!!! negated classes don't match \n */
-    ep = r;
-    quoted = nextc(&rune);
-    if(!quoted && rune == '^'){
-        type = NCCLASS;
-        quoted = nextc(&rune);
-        *ep++ = '\n';
-        *ep++ = '\n';
-    }
-
-    /* parse class into a set of spans */
-    while(ep < &r[NCCRUNE-1]){
-        if(rune == 0){
-            rcerror("malformed '[]'");
-            return 0;
-        }
-        if(!quoted && rune == ']')
-            break;
-        if(!quoted && rune == '-'){
-            if(ep == r){
-                rcerror("malformed '[]'");
-                return 0;
-            }
-            quoted = nextc(&rune);
-            if((!quoted && rune == ']') || rune == 0){
-                rcerror("malformed '[]'");
-                return 0;
-            }
-            *(ep-1) = rune;
-        } else {
-            *ep++ = rune;
-            *ep++ = rune;
-        }
-        quoted = nextc(&rune);
-    }
-    if(ep >= &r[NCCRUNE-1]) {
-        rcerror("char class too large; increase Reclass.spans size");
-        return 0;
-    }
-
-    /* sort on span start */
-    for(p = r; p < ep; p += 2){
-        for(np = p; np < ep; np += 2)
-            if(*np < *p){
-                rune = np[0];
-                np[0] = p[0];
-                p[0] = rune;
-                rune = np[1];
-                np[1] = p[1];
-                p[1] = rune;
-            }
-    }
-
-    /* merge spans */
-    np = yyclassp->spans;
-    p = r;
-    if(r == ep)
-        yyclassp->end = np;
-    else {
-        np[0] = *p++;
-        np[1] = *p++;
-        for(; p < ep; p += 2)
-            /* overlapping or adjacent ranges? */
-            if(p[0] <= np[1] + 1){
-                if(p[1] >= np[1])
-                    np[1] = p[1];    /* coalesce */
-            } else {
-                np += 2;
-                np[0] = p[0];
-                np[1] = p[1];
-            }
-        yyclassp->end = np+2;
-    }
-
-    return type;
-}
-
 static Reprog* regcomp1(char *s, int literal, int dot_type) {
     int token;
 
@@ -419,16 +425,16 @@ static Reprog* regcomp1(char *s, int literal, int dot_type) {
     SrcExpr = s;
     nclass = 0;
     nparens = 0;
-    atorp = atorstack;
-    andp = andstack;
+    OperatorPtr = OperatorStack;
+    OperandPtr = OperandStack;
     subidp = subidstack;
     lastwasand = false;
     cursubid = 0;
 
     /* Start with a low priority operator to prime parser */
-    pushator(START-1);
-    while((token = lex(literal, dot_type)) != END){
-        if((token&0300) == OPERATOR)
+    PushOperator(START-1);
+    while((token = GetToken(literal, dot_type)) != END){
+        if((token & 0300) == OPERATOR)
             operator(token);
         else
             operand(token);
@@ -442,8 +448,8 @@ static Reprog* regcomp1(char *s, int literal, int dot_type) {
     evaluntil(START);
     if(nparens)
         rcerror("unmatched left paren");
-    --andp;    /* points to first and only operand */
-    pp->startinst = andp->first;
+    --OperandPtr;    /* points to first and only operand */
+    pp->startinst = OperandPtr->first;
     pp = optimize(pp);
     return pp;
 }