From 64df20b10a5623eed098f060f875fe28ede36dc9 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Tue, 18 Dec 2018 23:43:16 -0500 Subject: [PATCH] yet even more minor refactoring --- regcomp.c | 69 +++++++++++++++------------------- regcomp.h | 108 ++++++++++++++++++++++++++++++------------------------ regexp9.h | 86 +++++++++++++++++++------------------------ 3 files changed, 128 insertions(+), 135 deletions(-) diff --git a/regcomp.c b/regcomp.c index 96bdc71..077ef00 100644 --- a/regcomp.c +++ b/regcomp.c @@ -13,9 +13,6 @@ typedef struct Node { Reinst* last; } Node; -/* max character classes per program is nelem(reprog->class) */ -static Reprog *reprog; - /* max rune ranges per character class is nelem(classp->spans)/2 */ #define NCCRUNE nelem(classp->spans) @@ -30,14 +27,15 @@ static int SubExprStack[NSTACK]; /* parallel to OperatorStack */ static int* SubExprPtr; /* Pointer to the top of the sub-expression stack */ static int CurrSubExpr; /* id of current subexpression */ -static bool LastWasOperand; /* Last token was operand */ +static bool LastWasOperand; /* Last token was operand */ +static jmp_buf LandingPad; +static int NParens; +static int NClass; -static int nparens; -static unsigned int nclass; -static Reclass* classp; -static Reinst* freep; -static int yyrune; /* last lex'd rune */ -static Reclass* yyclassp; /* last lex'd class */ +static Reclass* classp; +static Reinst* freep; +static int yyrune; /* last lex'd rune */ +static Reclass* yyclassp; /* last lex'd class */ /* predeclared crap */ static Reinst* OpCode(int t); @@ -54,8 +52,6 @@ static void EvalUntil(int); static Reprog* Optimize(Reprog *pp); static Reprog* Compile(char *s, int literal, int dot_type); -static jmp_buf regkaboom; - /******************************************************************************/ static Reinst* OpCode(int t) { @@ -108,11 +104,9 @@ static int GetToken(int literal, int dot_type) { static void CompileError(char *s) { regerror(s); - longjmp(regkaboom, 1); + longjmp(LandingPad, 1); } -/******************************************************************************/ - static void PushOperand(Reinst *f, Reinst *l) { if (OperandPtr >= &OperandStack[NSTACK]) CompileError("operand stack overflow"); @@ -144,23 +138,22 @@ static int PopOperator(void) { /******************************************************************************/ static int BuildClass(void) { - int type; int r[NCCRUNE]; int *p, *ep, *np; int rune; int quoted; /* we have already seen the '[' */ - if (nclass >= nelem(reprog->class)) + if (NClass >= NCLASSES) CompileError("too many character classes; increase Reprog.class size"); - type = CCLASS; - yyclassp = &(classp[nclass++]); + int type = CCLASS; + yyclassp = &(classp[NClass++]); /* look ahead for negation */ /* SPECIAL CASE!!! negated classes don't match \n */ ep = r; quoted = NextRune(&rune); - if(!quoted && rune == '^'){ + if (!quoted && rune == '^') { type = NCCLASS; quoted = NextRune(&rune); *ep++ = '\n'; @@ -168,20 +161,20 @@ static int BuildClass(void) { } /* parse class into a set of spans */ - while(ep < &r[NCCRUNE-1]){ - if(rune == 0){ + while (ep < &r[NCCRUNE-1]) { + if (rune == 0) { CompileError("malformed '[]'"); return 0; } - if(!quoted && rune == ']') + if (!quoted && rune == ']') break; - if(!quoted && rune == '-'){ - if(ep == r){ + if (!quoted && rune == '-') { + if (ep == r) { CompileError("malformed '[]'"); return 0; } quoted = NextRune(&rune); - if((!quoted && rune == ']') || rune == 0){ + if ((!quoted && rune == ']') || rune == 0) { CompileError("malformed '[]'"); return 0; } @@ -192,15 +185,15 @@ static int BuildClass(void) { } quoted = NextRune(&rune); } - if(ep >= &r[NCCRUNE-1]) { + if (ep >= &r[NCCRUNE-1]) { CompileError("char class too large; increase Reclass.spans size"); return 0; } /* sort on span start */ - for(p = r; p < ep; p += 2){ - for(np = p; np < ep; np += 2) - if(*np < *p){ + for (p = r; p < ep; p += 2) { + for (np = p; np < ep; np += 2) + if (*np < *p) { rune = np[0]; np[0] = p[0]; p[0] = rune; @@ -254,12 +247,12 @@ static void Operand(int t) { } static void Operator(int t) { - if(t==RPAREN && --nparens<0) + if(t==RPAREN && --NParens<0) CompileError("unmatched right paren"); if(t==LPAREN){ if(++CurrSubExpr >= NSUBEXP) CompileError("too many subexpressions"); - nparens++; + NParens++; if(LastWasOperand) Operator(CAT); }else @@ -341,9 +334,7 @@ static Reprog* Optimize(Reprog *pp) { Reclass *cl; int diff; - /* - * get rid of NOOP chains - */ + /* get rid of NOOP chains */ for(inst = pp->firstinst; inst->type != END; inst++){ target = inst->l.next; while(target->type == NOP) @@ -398,13 +389,13 @@ static Reprog* Compile(char *s, int literal, int dot_type) { classp = pp->class; /* setup landing pad for fatal errors */ - if (setjmp(regkaboom)) + if (setjmp(LandingPad)) return (free(pp), NULL); /* go compile the sucker */ SrcExpr = s; - nclass = 0; - nparens = 0; + NClass = 0; + NParens = 0; OperatorPtr = OperatorStack; OperandPtr = OperandStack; SubExprPtr = SubExprStack; @@ -426,7 +417,7 @@ static Reprog* Compile(char *s, int literal, int dot_type) { /* Force END */ Operand(END); EvalUntil(START); - if(nparens) + if (NParens) CompileError("unmatched left paren"); --OperandPtr; /* points to first and only operand */ pp->startinst = OperandPtr->first; diff --git a/regcomp.h b/regcomp.h index c21d21e..88bda59 100644 --- a/regcomp.h +++ b/regcomp.h @@ -4,11 +4,10 @@ #define nelem(x) (sizeof(x)/sizeof((x)[0])) #define NSUBEXP 32 -typedef struct Resublist Resublist; -struct Resublist -{ + +typedef struct { Resub m[NSUBEXP]; -}; +} Resublist; /* * Actions and Tokens (Reinst types) @@ -16,50 +15,65 @@ struct Resublist * 02xx are operators, value == precedence * 03xx are tokens, i.e. operands for operators */ -#define RUNE 0177 -#define OPERATOR 0200 /* Bitmask of all operators */ -#define START 0200 /* Start, used for marker on stack */ -#define RPAREN 0201 /* Right bracket, ) */ -#define LPAREN 0202 /* Left bracket, ( */ -#define OR 0203 /* Alternation, | */ -#define CAT 0204 /* Concatentation, implicit operator */ -#define STAR 0205 /* Closure, * */ -#define PLUS 0206 /* a+ == aa* */ -#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ -#define ANY 0300 /* Any character except newline, . */ -#define ANYNL 0301 /* Any character including newline, . */ -#define NOP 0302 /* No operation, internal use only */ -#define BOL 0303 /* Beginning of line, ^ */ -#define EOL 0304 /* End of line, $ */ -#define CCLASS 0305 /* Character class, [] */ -#define NCCLASS 0306 /* Negated character class, [] */ -#define END 0377 /* Terminate: match found */ +enum { + RUNE = 0177, + OPERATOR = 0200, /* Bitmask of all operators */ + START = 0200, /* Start, used for marker on stack */ + RPAREN = 0201, /* Right bracket, ) */ + LPAREN = 0202, /* Left bracket, ( */ + OR = 0203, /* Alternation, | */ + CAT = 0204, /* Concatentation, implicit operator */ + STAR = 0205, /* Closure, * */ + PLUS = 0206, /* a+ == aa* */ + QUEST = 0207, /* a? == a|nothing, i.e. 0 or 1 a's */ + ANY = 0300, /* Any character except newline, . */ + ANYNL = 0301, /* Any character including newline, . */ + NOP = 0302, /* No operation, internal use only */ + BOL = 0303, /* Beginning of line, ^ */ + EOL = 0304, /* End of line, $ */ + CCLASS = 0305, /* Character class, [] */ + NCCLASS = 0306, /* Negated character class, [] */ + END = 0377, /* Terminate: match found */ -/* - * regexec execution lists - */ +// END = -1, /* Terminate: match found */ +// START = -2, /* Start, used for marker on stack */ +// RPAREN = -3, /* Right bracket, ) */ +// LPAREN = -4, /* Left bracket, ( */ +// OR = -5, /* Alternation, | */ +// CAT = -6, /* Concatentation, implicit operator */ +// STAR = -7, /* Closure, * */ +// PLUS = -8, /* a+ == aa* */ +// QUEST = -9, /* a? == a|nothing, i.e. 0 or 1 a's */ +// ANY = -10, /* Any character except newline, . */ +// ANYNL = -11, /* Any character including newline, . */ +// NOP = -12, /* No operation, internal use only */ +// BOL = -13, /* Beginning of line, ^ */ +// EOL = -14, /* End of line, $ */ +// CCLASS = -15, /* Character class, [] */ +// NCCLASS = -16, /* Negated character class, [] */ +}; + +/* regexec execution lists */ #define LISTSIZE 10 #define BIGLISTSIZE (25*LISTSIZE) -typedef struct Relist Relist; -struct Relist -{ - Reinst* inst; /* Reinstruction of the thread */ - Resublist se; /* matched subexpressions in this thread */ -}; -typedef struct Reljunk Reljunk; -struct Reljunk -{ - Relist* relist[2]; - Relist* reliste[2]; - int starttype; - int startchar; - char* starts; - char* eol; - int* rstarts; - int* reol; -}; -extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); -extern void _renewmatch(Resub*, int, Resublist*); -extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); -extern Relist* _rrenewemptythread(Relist*, Reinst*, int, int*); +typedef struct { + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +} Relist; + +typedef struct { + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + int startchar; + char* starts; + char* eol; + int* rstarts; + int* reol; +} Reljunk; + +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, int*); diff --git a/regexp9.h b/regexp9.h index 9b52e66..375fa53 100644 --- a/regexp9.h +++ b/regexp9.h @@ -3,67 +3,55 @@ #include "utf.h" -typedef struct Reinst Reinst; - -/* - * Sub expression matches - */ -typedef struct Resub { - union - { - char *sp; - int *rsp; - }s; - union - { - char *ep; - int *rep; - }e; +/* Sub expression matches */ +typedef struct { + union { + char* sp; + int* rsp; + } s; + union { + char* ep; + int* rep; + } e; } Resub; -/* - * character class, each pair of rune's defines a range - */ -typedef struct Reclass { - int *end; - int spans[64]; +/* character class, each pair of rune's defines a range */ +typedef struct { + int* end; + int spans[64]; } Reclass; -/* - * Machine instructions - */ +/* Machine instructions */ +typedef struct Reinst Reinst; struct Reinst { - int type; + int type; union { - Reclass *cp; /* class pointer */ - int r; /* character */ - int subid; /* sub-expression id for RBRA and LBRA */ - Reinst *right; /* right child of OR */ + Reclass* cp; /* class pointer */ + int r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst* right; /* right child of OR */ }r; - union { /* regexp relies on these two being in the same union */ - Reinst *left; /* left child of OR */ - Reinst *next; /* next instruction for CAT & LBRA */ + union { /* regexp relies on these two being in the same union */ + Reinst* left; /* left child of OR */ + Reinst* next; /* next instruction for CAT & LBRA */ }l; }; -/* - * Reprogram definition - */ -typedef struct Reprog { - Reinst *startinst; /* start pc */ - Reclass class[16]; /* .data */ - Reinst firstinst[5]; /* .text */ -} Reprog; +#define NCLASSES 16 -extern Reprog *regcomp9(char*); -extern Reprog *regcomplit9(char*); -extern Reprog *regcompnl9(char*); -extern void regerror9(char*); -extern int regexec9(Reprog*, char*, Resub*, int); -extern void regsub9(char*, char*, int, Resub*, int); +/* Reprogram definition */ +typedef struct { + Reinst* startinst; /* start pc */ + Reclass class[NCLASSES]; /* .data */ + Reinst firstinst[5]; /* .text */ +} Reprog; -extern int rregexec9(Reprog*, int*, Resub*, int); -extern void rregsub9(int*, int*, int, Resub*, int); +extern Reprog* regcomp9(char*); +extern Reprog* regcomplit9(char*); +extern Reprog* regcompnl9(char*); +extern void regerror9(char*); +extern int regexec9(Reprog*, char*, Resub*, int); +extern void regsub9(char*, char*, int, Resub*, int); /* * Darwin simply cannot handle having routines that -- 2.49.0