#include <setjmp.h>
#include <string.h>
#include <stdlib.h>
+#include <stdbool.h>
+
#include "regexp9.h"
#include "regcomp.h"
-#define TRUE 1
-#define FALSE 0
-
-/*
- * Parser Information
- */
-typedef
-struct Node
-{
- Reinst* first;
- Reinst* last;
-}Node;
+/* Parser Information */
+typedef struct Node {
+ Reinst* first;
+ Reinst* last;
+} Node;
/* max character classes per program is nelem(reprog->class) */
static Reprog *reprog;
/* max rune ranges per character class is nelem(classp->spans)/2 */
#define NCCRUNE nelem(classp->spans)
+static char* SrcExpr; /* pointer to next character in source expression */
+
#define NSTACK 20
-static Node andstack[NSTACK];
-static Node *andp;
-static int atorstack[NSTACK];
-static int* atorp;
+static Node andstack[NSTACK]; /* Stack of operands */
+static Node* andp; /* Pointer to the top of the operand stack */
+static int atorstack[NSTACK]; /* Stack of operators */
+static int* atorp; /* Pointer to the top of the operator stack */
+
static int cursubid; /* id of current subexpression */
static int subidstack[NSTACK]; /* parallel to atorstack */
static int* subidp;
-static int lastwasand; /* Last token was operand */
-static int nbra;
-static char* exprp; /* pointer to next character in source expression */
+static bool lastwasand; /* Last token was operand */
+static int nparens;
static int lexdone;
static unsigned int nclass;
static Reclass*classp;
static Reinst* freep;
static int errors;
-static Rune yyrune; /* last lex'd rune */
-static Reclass*yyclassp; /* last lex'd class */
+static int yyrune; /* last lex'd rune */
+static Reclass* yyclassp; /* last lex'd class */
/* predeclared crap */
static void operator(int);
static jmp_buf regkaboom;
-static void
-rcerror(char *s)
-{
+/******************************************************************************/
+
+static int nextc(int* p_rune) {
+ if (lexdone) {
+ *p_rune = 0;
+ return 1;
+ }
+ SrcExpr += chartorune(p_rune, SrcExpr);
+ if(*p_rune == '\\'){
+ SrcExpr += chartorune(p_rune, SrcExpr);
+ return 1;
+ }
+ lexdone = (*p_rune == 0);
+ return 0;
+}
+
+static int lex(int literal, int dot_type) {
+ int quoted = nextc(&yyrune);
+ if (literal || quoted) {
+ if (yyrune == 0)
+ return END;
+ return RUNE;
+ }
+
+ switch (yyrune) {
+ case 0: return END;
+ case '*': return STAR;
+ case '?': return QUEST;
+ case '+': return PLUS;
+ case '|': return OR;
+ case '.': return dot_type;
+ case '(': return LPAREN;
+ case ')': return RPAREN;
+ case '^': return BOL;
+ case '$': return EOL;
+ case '[': return bldcclass();
+ }
+ return RUNE;
+}
+
+/******************************************************************************/
+
+static void rcerror(char *s) {
errors++;
regerror(s);
longjmp(regkaboom, 1);
}
-static Reinst*
-newinst(int t)
-{
+static void regerr2(char *s, int c) {
+ char buf[100];
+ char *cp = buf;
+ while(*s)
+ *cp++ = *s++;
+ *cp++ = c;
+ *cp = '\0';
+ rcerror(buf);
+}
+
+static void cant(char *s) {
+ char buf[100];
+ strncpy(buf, "can't happen: ", sizeof(buf));
+ strncat(buf, s, sizeof(buf)-1);
+ rcerror(buf);
+}
+
+/******************************************************************************/
+
+static Reinst* newinst(int t) {
freep->type = t;
freep->l.left = 0;
freep->r.right = 0;
return freep++;
}
-static void
-operand(int t)
-{
+static void operand(int t) {
Reinst *i;
if(lastwasand)
i->r.r = yyrune;
pushand(i, i);
- lastwasand = TRUE;
+ lastwasand = true;
}
-static void
-operator(int t)
-{
- if(t==RBRA && --nbra<0)
+static void operator(int t) {
+ if(t==RPAREN && --nparens<0)
rcerror("unmatched right paren");
- if(t==LBRA){
+ if(t==LPAREN){
if(++cursubid >= NSUBEXP)
rcerror("too many subexpressions");
- nbra++;
+ nparens++;
if(lastwasand)
operator(CAT);
}else
evaluntil(t);
- if(t != RBRA)
+ if(t != RPAREN)
pushator(t);
- lastwasand = FALSE;
- if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
- lastwasand = TRUE; /* these look like operands */
+ lastwasand = false;
+ if(t==STAR || t==QUEST || t==PLUS || t==RPAREN)
+ lastwasand = true; /* these look like operands */
}
-static void
-regerr2(char *s, int c)
-{
- char buf[100];
- char *cp = buf;
- while(*s)
- *cp++ = *s++;
- *cp++ = c;
- *cp = '\0';
- rcerror(buf);
-}
-
-static void
-cant(char *s)
-{
- char buf[100];
- strncpy(buf, "can't happen: ", sizeof(buf));
- strncat(buf, s, sizeof(buf)-1);
- rcerror(buf);
-}
-
-static void
-pushand(Reinst *f, Reinst *l)
-{
+static void pushand(Reinst *f, Reinst *l) {
if(andp >= &andstack[NSTACK])
cant("operand stack overflow");
andp->first = f;
andp++;
}
-static void
-pushator(int t)
-{
+static void pushator(int t) {
if(atorp >= &atorstack[NSTACK])
cant("operator stack overflow");
*atorp++ = t;
*subidp++ = cursubid;
}
-static Node*
-popand(int op)
-{
+static Node* popand(int op) {
Reinst *inst;
if(andp <= &andstack[0]){
return --andp;
}
-static int
-popator(void)
-{
+static int popator(void) {
if(atorp <= &atorstack[0])
cant("operator stack underflow");
--subidp;
return *--atorp;
}
-static void
-evaluntil(int pri)
-{
+static void evaluntil(int pri) {
Node *op1, *op2;
Reinst *inst1, *inst2;
- while(pri==RBRA || atorp[-1]>=pri){
+ while(pri==RPAREN || atorp[-1]>=pri){
switch(popator()){
default:
rcerror("unknown operator in evaluntil");
break;
- case LBRA: /* must have been RBRA */
+ case LPAREN: /* must have been RPAREN */
op1 = popand('(');
- inst2 = newinst(RBRA);
+ inst2 = newinst(RPAREN);
inst2->r.subid = *subidp;
op1->last->l.next = inst2;
- inst1 = newinst(LBRA);
+ inst1 = newinst(LPAREN);
inst1->r.subid = *subidp;
inst1->l.next = op1->first;
pushand(inst1, inst2);
}
}
-static Reprog*
-optimize(Reprog *pp)
-{
+static Reprog* optimize(Reprog *pp) {
Reinst *inst, *target;
int size;
Reprog *npp;
/*
* get rid of NOOP chains
*/
- for(inst=pp->firstinst; inst->type!=END; inst++){
+ for(inst = pp->firstinst; inst->type != END; inst++){
target = inst->l.next;
while(target->type == NOP)
target = target->l.next;
return npp;
}
-#ifdef DEBUG
-static void
-dumpstack(void){
- Node *stk;
- int *ip;
-
- printf("operators\n");
- for(ip=atorstack; ip<atorp; ip++)
- printf("0%o\n", *ip);
- printf("operands\n");
- for(stk=andstack; stk<andp; stk++)
- printf("0%o\t0%o\n", stk->first->type, stk->last->type);
-}
-
-static void
-dump(Reprog *pp)
-{
- Reinst *l;
- Rune *p;
-
- l = pp->firstinst;
- do{
- printf("%d:\t0%o\t%d\t%d", (int)(l-pp->firstinst), l->type,
- (int)(l->l.left-pp->firstinst), (int)(l->r.right-pp->firstinst));
- if(l->type == RUNE)
- printf("\t%C\n", l->r.r);
- else if(l->type == CCLASS || l->type == NCCLASS){
- printf("\t[");
- if(l->type == NCCLASS)
- printf("^");
- for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
- if(p[0] == p[1])
- printf("%C", p[0]);
- else
- printf("%C-%C", p[0], p[1]);
- printf("]\n");
- } else
- printf("\n");
- }while(l++->type);
-}
-#endif
-
-static Reclass*
-newclass(void)
-{
+static Reclass* newclass(void) {
if(nclass >= nelem(reprog->class))
rcerror("too many character classes; increase Reprog.class size");
return &(classp[nclass++]);
}
-static int
-nextc(Rune *rp)
-{
- if(lexdone){
- *rp = 0;
- return 1;
- }
- exprp += chartorune(rp, exprp);
- if(*rp == '\\'){
- exprp += chartorune(rp, exprp);
- return 1;
- }
- if(*rp == 0)
- lexdone = 1;
- return 0;
-}
-
-static int
-lex(int literal, int dot_type)
-{
- int quoted;
-
- quoted = nextc(&yyrune);
- if(literal || quoted){
- if(yyrune == 0)
- return END;
- return RUNE;
- }
-
- switch(yyrune){
- case 0:
- return END;
- case '*':
- return STAR;
- case '?':
- return QUEST;
- case '+':
- return PLUS;
- case '|':
- return OR;
- case '.':
- return dot_type;
- case '(':
- return LBRA;
- case ')':
- return RBRA;
- case '^':
- return BOL;
- case '$':
- return EOL;
- case '[':
- return bldcclass();
- }
- return RUNE;
-}
-
-static int
-bldcclass(void)
-{
+static int bldcclass(void) {
int type;
- Rune r[NCCRUNE];
- Rune *p, *ep, *np;
- Rune rune;
+ int r[NCCRUNE];
+ int *p, *ep, *np;
+ int rune;
int quoted;
/* we have already seen the '[' */
return type;
}
-static Reprog*
-regcomp1(char *s, int literal, int dot_type)
-{
+static Reprog* regcomp1(char *s, int literal, int dot_type) {
int token;
- Reprog *volatile pp;
/* get memory for the program */
- pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
- if(pp == 0){
+ Reprog* volatile pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
+ if (pp == 0) {
regerror("out of memory");
return 0;
}
classp = pp->class;
errors = 0;
+ /* setup landing pad for fatal errors */
if(setjmp(regkaboom))
- goto out;
+ return (free(pp), NULL);
/* go compile the sucker */
lexdone = 0;
- exprp = s;
+ SrcExpr = s;
nclass = 0;
- nbra = 0;
+ nparens = 0;
atorp = atorstack;
andp = andstack;
subidp = subidstack;
- lastwasand = FALSE;
+ lastwasand = false;
cursubid = 0;
/* Start with a low priority operator to prime parser */
/* Force END */
operand(END);
evaluntil(START);
-#ifdef DEBUG
- dumpstack();
-#endif
- if(nbra)
+ if(nparens)
rcerror("unmatched left paren");
--andp; /* points to first and only operand */
pp->startinst = andp->first;
-#ifdef DEBUG
- dump(pp);
-#endif
pp = optimize(pp);
-#ifdef DEBUG
- printf("start: %d\n", (int)(andp->first-pp->firstinst));
- dump(pp);
-#endif
-out:
- if(errors){
- free(pp);
- pp = 0;
- }
return pp;
}
-extern Reprog*
-regcomp(char *s)
-{
+Reprog* regcomp(char *s) {
return regcomp1(s, 0, ANY);
}
-extern Reprog*
-regcomplit(char *s)
-{
+Reprog* regcomplit(char *s) {
return regcomp1(s, 1, ANY);
}
-extern Reprog*
-regcompnl(char *s)
-{
+Reprog* regcompnl(char *s) {
return regcomp1(s, 0, ANYNL);
}
#define RUNE 0177
#define OPERATOR 0200 /* Bitmask of all operators */
#define START 0200 /* Start, used for marker on stack */
-#define RBRA 0201 /* Right bracket, ) */
-#define LBRA 0202 /* Left bracket, ( */
+#define RPAREN 0201 /* Right bracket, ) */
+#define LPAREN 0202 /* Left bracket, ( */
#define OR 0203 /* Alternation, | */
#define CAT 0204 /* Concatentation, implicit operator */
#define STAR 0205 /* Closure, * */
#define BOL 0303 /* Beginning of line, ^ */
#define EOL 0304 /* End of line, $ */
#define CCLASS 0305 /* Character class, [] */
-#define NCCLASS 0306 /* Negated character class, [] */
+#define NCCLASS 0306 /* Negated character class, [] */
#define END 0377 /* Terminate: match found */
/*
Relist* relist[2];
Relist* reliste[2];
int starttype;
- Rune startchar;
+ int startchar;
char* starts;
char* eol;
- Rune* rstarts;
- Rune* reol;
+ int* rstarts;
+ int* reol;
};
extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
extern void _renewmatch(Resub*, int, Resublist*);
extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
-extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
+extern Relist* _rrenewemptythread(Relist*, Reinst*, int, int*);
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
- Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
+ int1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ int2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
+ int3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
+ int4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
Maskx = (1<<Bitx)-1, /* 0011 1111 */
Testx = Maskx ^ 0xFF, /* 1100 0000 */
- Bad = Runeerror
+ Bad = interror
};
int
-chartorune(Rune *rune, char *str)
+chartorune(int *rune, char *str)
{
int c, c1, c2, c3;
long l;
if(c < T3) {
if(c < T2)
goto bad;
- l = ((c << Bitx) | c1) & Rune2;
- if(l <= Rune1)
+ l = ((c << Bitx) | c1) & int2;
+ if(l <= int1)
goto bad;
*rune = l;
return 2;
if(c2 & Testx)
goto bad;
if(c < T4) {
- l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
- if(l <= Rune2)
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & int3;
+ if(l <= int2)
goto bad;
*rune = l;
return 3;
if(c3 & Testx)
goto bad;
if(c < T5) {
- l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
- if(l <= Rune3)
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & int4;
+ if(l <= int3)
goto bad;
- if(l > Runemax)
+ if(l > intmax)
goto bad;
*rune = l;
return 4;
return 1;
}
-Rune*
-runestrchr(Rune *s, Rune c)
+int*
+runestrchr(int *s, int c)
{
- Rune c0 = c;
- Rune c1;
+ int c0 = c;
+ int c1;
if(c == 0) {
while(*s++)
}
char*
-utfrune(char *s, Rune c)
+utfrune(char *s, int c)
{
- Rune c1;
- Rune r;
+ int c1;
+ int r;
int n;
- if(c < Runesync) /* not part of utf sequence */
+ if(c < intsync) /* not part of utf sequence */
return strchr(s, c);
for(;;) {
c1 = *(unsigned char*)s;
- if(c1 < Runeself) { /* one byte rune */
+ if(c1 < intself) { /* one byte rune */
if(c1 == 0)
return 0;
if(c1 == c)