CC=gcc
-CFLAGS+=-Wall -Wextra -O3 -c -g
+CFLAGS += --std=c99 -pedantic -Wall -Wextra -O3 -c -g
O=o
LIB=libregexp9.a
extern void
_renewmatch(Resub *mp, int ms, Resublist *sp)
{
- int i;
+ int i;
- if(mp==0 || ms<=0)
- return;
- if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp ||
- (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){
- for(i=0; i<ms && i<NSUBEXP; i++)
- mp[i] = sp->m[i];
- for(; i<ms; i++)
- mp[i].s.sp = mp[i].e.ep = 0;
- }
+ if(mp==0 || ms<=0)
+ return;
+ if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp ||
+ (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){
+ for(i=0; i<ms && i<NSUBEXP; i++)
+ mp[i] = sp->m[i];
+ for(; i<ms; i++)
+ mp[i].s.sp = mp[i].e.ep = 0;
+ }
}
/*
* Note optimization in _renewthread:
- * *lp must be pending when _renewthread called; if *l has been looked
- * at already, the optimization is a bug.
+ * *lp must be pending when _renewthread called; if *l has been looked
+ * at already, the optimization is a bug.
*/
extern Relist*
-_renewthread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- int ms,
- Resublist *sep) /* pointers to subexpressions */
+_renewthread(Relist *lp, /* _relist to add to */
+ Reinst *ip, /* instruction to add */
+ int ms,
+ Resublist *sep) /* pointers to subexpressions */
{
- Relist *p;
+ Relist *p;
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if(sep->m[0].s.sp < p->se.m[0].s.sp){
- if(ms > 1)
- p->se = *sep;
- else
- p->se.m[0] = sep->m[0];
- }
- return 0;
- }
- }
- p->inst = ip;
- if(ms > 1)
- p->se = *sep;
- else
- p->se.m[0] = sep->m[0];
- (++p)->inst = 0;
- return p;
+ for(p=lp; p->inst; p++){
+ if(p->inst == ip){
+ if(sep->m[0].s.sp < p->se.m[0].s.sp){
+ if(ms > 1)
+ p->se = *sep;
+ else
+ p->se.m[0] = sep->m[0];
+ }
+ return 0;
+ }
+ }
+ p->inst = ip;
+ if(ms > 1)
+ p->se = *sep;
+ else
+ p->se.m[0] = sep->m[0];
+ (++p)->inst = 0;
+ return p;
}
/*
* initial empty start pointer.
*/
extern Relist*
-_renewemptythread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- int ms,
- char *sp) /* pointers to subexpressions */
+_renewemptythread(Relist *lp, /* _relist to add to */
+ Reinst *ip, /* instruction to add */
+ int ms,
+ char *sp) /* pointers to subexpressions */
{
- Relist *p;
+ Relist *p;
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if(sp < p->se.m[0].s.sp) {
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].s.sp = sp;
- }
- return 0;
- }
- }
- p->inst = ip;
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].s.sp = sp;
- (++p)->inst = 0;
- return p;
+ for(p=lp; p->inst; p++){
+ if(p->inst == ip){
+ if(sp < p->se.m[0].s.sp) {
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.sp = sp;
+ }
+ return 0;
+ }
+ }
+ p->inst = ip;
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.sp = sp;
+ (++p)->inst = 0;
+ return p;
}
extern Relist*
-_rrenewemptythread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- int ms,
- Rune *rsp) /* pointers to subexpressions */
+_rrenewemptythread(Relist *lp, /* _relist to add to */
+ Reinst *ip, /* instruction to add */
+ int ms,
+ Rune *rsp) /* pointers to subexpressions */
{
- Relist *p;
+ Relist *p;
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if(rsp < p->se.m[0].s.rsp) {
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].s.rsp = rsp;
- }
- return 0;
- }
- }
- p->inst = ip;
- if(ms > 1)
- memset(&p->se, 0, sizeof(p->se));
- p->se.m[0].s.rsp = rsp;
- (++p)->inst = 0;
- return p;
+ for(p=lp; p->inst; p++){
+ if(p->inst == ip){
+ if(rsp < p->se.m[0].s.rsp) {
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.rsp = rsp;
+ }
+ return 0;
+ }
+ }
+ p->inst = ip;
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.rsp = rsp;
+ (++p)->inst = 0;
+ return p;
}
#include "regexp9.h"
#include "regcomp.h"
-#define TRUE 1
-#define FALSE 0
+#define TRUE 1
+#define FALSE 0
/*
* Parser Information
typedef
struct Node
{
- Reinst* first;
- Reinst* last;
+ Reinst* first;
+ Reinst* last;
}Node;
/* max character classes per program is nelem(reprog->class) */
-static Reprog *reprog;
+static Reprog *reprog;
/* max rune ranges per character class is nelem(classp->spans)/2 */
-#define NCCRUNE nelem(classp->spans)
-
-#define NSTACK 20
-static Node andstack[NSTACK];
-static Node *andp;
-static int atorstack[NSTACK];
-static int* atorp;
-static int cursubid; /* id of current subexpression */
-static int subidstack[NSTACK]; /* parallel to atorstack */
-static int* subidp;
-static int lastwasand; /* Last token was operand */
-static int nbra;
-static char* exprp; /* pointer to next character in source expression */
-static int lexdone;
-static unsigned int nclass;
-static Reclass*classp;
-static Reinst* freep;
-static int errors;
-static Rune yyrune; /* last lex'd rune */
-static Reclass*yyclassp; /* last lex'd class */
+#define NCCRUNE nelem(classp->spans)
+
+#define NSTACK 20
+static Node andstack[NSTACK];
+static Node *andp;
+static int atorstack[NSTACK];
+static int* atorp;
+static int cursubid; /* id of current subexpression */
+static int subidstack[NSTACK]; /* parallel to atorstack */
+static int* subidp;
+static int lastwasand; /* Last token was operand */
+static int nbra;
+static char* exprp; /* pointer to next character in source expression */
+static int lexdone;
+static unsigned int nclass;
+static Reclass*classp;
+static Reinst* freep;
+static int errors;
+static Rune yyrune; /* last lex'd rune */
+static Reclass*yyclassp; /* last lex'd class */
/* predeclared crap */
-static void operator(int);
-static void pushand(Reinst*, Reinst*);
-static void pushator(int);
-static void evaluntil(int);
-static int bldcclass(void);
+static void operator(int);
+static void pushand(Reinst*, Reinst*);
+static void pushator(int);
+static void evaluntil(int);
+static int bldcclass(void);
static jmp_buf regkaboom;
-static void
+static void
rcerror(char *s)
{
- errors++;
- regerror(s);
- longjmp(regkaboom, 1);
+ errors++;
+ regerror(s);
+ longjmp(regkaboom, 1);
}
-static Reinst*
+static Reinst*
newinst(int t)
{
- freep->type = t;
- freep->l.left = 0;
- freep->r.right = 0;
- return freep++;
+ freep->type = t;
+ freep->l.left = 0;
+ freep->r.right = 0;
+ return freep++;
}
-static void
+static void
operand(int t)
{
- Reinst *i;
+ Reinst *i;
- if(lastwasand)
- operator(CAT); /* catenate is implicit */
- i = newinst(t);
+ if(lastwasand)
+ operator(CAT); /* catenate is implicit */
+ i = newinst(t);
- if(t == CCLASS || t == NCCLASS)
- i->r.cp = yyclassp;
- if(t == RUNE)
- i->r.r = yyrune;
+ if(t == CCLASS || t == NCCLASS)
+ i->r.cp = yyclassp;
+ if(t == RUNE)
+ i->r.r = yyrune;
- pushand(i, i);
- lastwasand = TRUE;
+ pushand(i, i);
+ lastwasand = TRUE;
}
-static void
+static void
operator(int t)
{
- if(t==RBRA && --nbra<0)
- rcerror("unmatched right paren");
- if(t==LBRA){
- if(++cursubid >= NSUBEXP)
- rcerror("too many subexpressions");
- nbra++;
- if(lastwasand)
- operator(CAT);
- }else
- evaluntil(t);
- if(t != RBRA)
- pushator(t);
- lastwasand = FALSE;
- if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
- lastwasand = TRUE; /* these look like operands */
+ if(t==RBRA && --nbra<0)
+ rcerror("unmatched right paren");
+ if(t==LBRA){
+ if(++cursubid >= NSUBEXP)
+ rcerror("too many subexpressions");
+ nbra++;
+ if(lastwasand)
+ operator(CAT);
+ }else
+ evaluntil(t);
+ if(t != RBRA)
+ pushator(t);
+ lastwasand = FALSE;
+ if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
+ lastwasand = TRUE; /* these look like operands */
}
-static void
+static void
regerr2(char *s, int c)
{
- char buf[100];
- char *cp = buf;
- while(*s)
- *cp++ = *s++;
- *cp++ = c;
- *cp = '\0';
- rcerror(buf);
+ char buf[100];
+ char *cp = buf;
+ while(*s)
+ *cp++ = *s++;
+ *cp++ = c;
+ *cp = '\0';
+ rcerror(buf);
}
-static void
+static void
cant(char *s)
{
- char buf[100];
- strncpy(buf, "can't happen: ", sizeof(buf));
- strncat(buf, s, sizeof(buf)-1);
- rcerror(buf);
+ char buf[100];
+ strncpy(buf, "can't happen: ", sizeof(buf));
+ strncat(buf, s, sizeof(buf)-1);
+ rcerror(buf);
}
-static void
+static void
pushand(Reinst *f, Reinst *l)
{
- if(andp >= &andstack[NSTACK])
- cant("operand stack overflow");
- andp->first = f;
- andp->last = l;
- andp++;
+ if(andp >= &andstack[NSTACK])
+ cant("operand stack overflow");
+ andp->first = f;
+ andp->last = l;
+ andp++;
}
-static void
+static void
pushator(int t)
{
- if(atorp >= &atorstack[NSTACK])
- cant("operator stack overflow");
- *atorp++ = t;
- *subidp++ = cursubid;
+ if(atorp >= &atorstack[NSTACK])
+ cant("operator stack overflow");
+ *atorp++ = t;
+ *subidp++ = cursubid;
}
-static Node*
+static Node*
popand(int op)
{
- Reinst *inst;
-
- if(andp <= &andstack[0]){
- regerr2("missing operand for ", op);
- inst = newinst(NOP);
- pushand(inst,inst);
- }
- return --andp;
+ Reinst *inst;
+
+ if(andp <= &andstack[0]){
+ regerr2("missing operand for ", op);
+ inst = newinst(NOP);
+ pushand(inst,inst);
+ }
+ return --andp;
}
-static int
+static int
popator(void)
{
- if(atorp <= &atorstack[0])
- cant("operator stack underflow");
- --subidp;
- return *--atorp;
+ if(atorp <= &atorstack[0])
+ cant("operator stack underflow");
+ --subidp;
+ return *--atorp;
}
-static void
+static void
evaluntil(int pri)
{
- Node *op1, *op2;
- Reinst *inst1, *inst2;
-
- while(pri==RBRA || atorp[-1]>=pri){
- switch(popator()){
- default:
- rcerror("unknown operator in evaluntil");
- break;
- case LBRA: /* must have been RBRA */
- op1 = popand('(');
- inst2 = newinst(RBRA);
- inst2->r.subid = *subidp;
- op1->last->l.next = inst2;
- inst1 = newinst(LBRA);
- inst1->r.subid = *subidp;
- inst1->l.next = op1->first;
- pushand(inst1, inst2);
- return;
- case OR:
- op2 = popand('|');
- op1 = popand('|');
- inst2 = newinst(NOP);
- op2->last->l.next = inst2;
- op1->last->l.next = inst2;
- inst1 = newinst(OR);
- inst1->r.right = op1->first;
- inst1->l.left = op2->first;
- pushand(inst1, inst2);
- break;
- case CAT:
- op2 = popand(0);
- op1 = popand(0);
- op1->last->l.next = op2->first;
- pushand(op1->first, op2->last);
- break;
- case STAR:
- op2 = popand('*');
- inst1 = newinst(OR);
- op2->last->l.next = inst1;
- inst1->r.right = op2->first;
- pushand(inst1, inst1);
- break;
- case PLUS:
- op2 = popand('+');
- inst1 = newinst(OR);
- op2->last->l.next = inst1;
- inst1->r.right = op2->first;
- pushand(op2->first, inst1);
- break;
- case QUEST:
- op2 = popand('?');
- inst1 = newinst(OR);
- inst2 = newinst(NOP);
- inst1->l.left = inst2;
- inst1->r.right = op2->first;
- op2->last->l.next = inst2;
- pushand(inst1, inst2);
- break;
- }
- }
+ Node *op1, *op2;
+ Reinst *inst1, *inst2;
+
+ while(pri==RBRA || atorp[-1]>=pri){
+ switch(popator()){
+ default:
+ rcerror("unknown operator in evaluntil");
+ break;
+ case LBRA: /* must have been RBRA */
+ op1 = popand('(');
+ inst2 = newinst(RBRA);
+ inst2->r.subid = *subidp;
+ op1->last->l.next = inst2;
+ inst1 = newinst(LBRA);
+ inst1->r.subid = *subidp;
+ inst1->l.next = op1->first;
+ pushand(inst1, inst2);
+ return;
+ case OR:
+ op2 = popand('|');
+ op1 = popand('|');
+ inst2 = newinst(NOP);
+ op2->last->l.next = inst2;
+ op1->last->l.next = inst2;
+ inst1 = newinst(OR);
+ inst1->r.right = op1->first;
+ inst1->l.left = op2->first;
+ pushand(inst1, inst2);
+ break;
+ case CAT:
+ op2 = popand(0);
+ op1 = popand(0);
+ op1->last->l.next = op2->first;
+ pushand(op1->first, op2->last);
+ break;
+ case STAR:
+ op2 = popand('*');
+ inst1 = newinst(OR);
+ op2->last->l.next = inst1;
+ inst1->r.right = op2->first;
+ pushand(inst1, inst1);
+ break;
+ case PLUS:
+ op2 = popand('+');
+ inst1 = newinst(OR);
+ op2->last->l.next = inst1;
+ inst1->r.right = op2->first;
+ pushand(op2->first, inst1);
+ break;
+ case QUEST:
+ op2 = popand('?');
+ inst1 = newinst(OR);
+ inst2 = newinst(NOP);
+ inst1->l.left = inst2;
+ inst1->r.right = op2->first;
+ op2->last->l.next = inst2;
+ pushand(inst1, inst2);
+ break;
+ }
+ }
}
-static Reprog*
+static Reprog*
optimize(Reprog *pp)
{
- Reinst *inst, *target;
- int size;
- Reprog *npp;
- Reclass *cl;
- int diff;
-
- /*
- * get rid of NOOP chains
- */
- for(inst=pp->firstinst; inst->type!=END; inst++){
- target = inst->l.next;
- while(target->type == NOP)
- target = target->l.next;
- inst->l.next = target;
- }
-
- /*
- * The original allocation is for an area larger than
- * necessary. Reallocate to the actual space used
- * and then relocate the code.
- */
- size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
- npp = realloc(pp, size);
- if(npp==0 || npp==pp)
- return pp;
- diff = (char *)npp - (char *)pp;
- freep = (Reinst *)((char *)freep + diff);
- for(inst=npp->firstinst; inst<freep; inst++){
- switch(inst->type){
- case OR:
- case STAR:
- case PLUS:
- case QUEST:
- inst->r.right = (void*)((char*)inst->r.right + diff);
- break;
- case CCLASS:
- case NCCLASS:
- inst->r.right = (void*)((char*)inst->r.right + diff);
- cl = inst->r.cp;
- cl->end = (void*)((char*)cl->end + diff);
- break;
- }
- inst->l.left = (void*)((char*)inst->l.left + diff);
- }
- npp->startinst = (void*)((char*)npp->startinst + diff);
- return npp;
+ Reinst *inst, *target;
+ int size;
+ Reprog *npp;
+ Reclass *cl;
+ int diff;
+
+ /*
+ * get rid of NOOP chains
+ */
+ for(inst=pp->firstinst; inst->type!=END; inst++){
+ target = inst->l.next;
+ while(target->type == NOP)
+ target = target->l.next;
+ inst->l.next = target;
+ }
+
+ /*
+ * The original allocation is for an area larger than
+ * necessary. Reallocate to the actual space used
+ * and then relocate the code.
+ */
+ size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
+ npp = realloc(pp, size);
+ if(npp==0 || npp==pp)
+ return pp;
+ diff = (char *)npp - (char *)pp;
+ freep = (Reinst *)((char *)freep + diff);
+ for(inst=npp->firstinst; inst<freep; inst++){
+ switch(inst->type){
+ case OR:
+ case STAR:
+ case PLUS:
+ case QUEST:
+ inst->r.right = (void*)((char*)inst->r.right + diff);
+ break;
+ case CCLASS:
+ case NCCLASS:
+ inst->r.right = (void*)((char*)inst->r.right + diff);
+ cl = inst->r.cp;
+ cl->end = (void*)((char*)cl->end + diff);
+ break;
+ }
+ inst->l.left = (void*)((char*)inst->l.left + diff);
+ }
+ npp->startinst = (void*)((char*)npp->startinst + diff);
+ return npp;
}
-#ifdef DEBUG
-static void
+#ifdef DEBUG
+static void
dumpstack(void){
- Node *stk;
- int *ip;
-
- printf("operators\n");
- for(ip=atorstack; ip<atorp; ip++)
- printf("0%o\n", *ip);
- printf("operands\n");
- for(stk=andstack; stk<andp; stk++)
- printf("0%o\t0%o\n", stk->first->type, stk->last->type);
+ Node *stk;
+ int *ip;
+
+ printf("operators\n");
+ for(ip=atorstack; ip<atorp; ip++)
+ printf("0%o\n", *ip);
+ printf("operands\n");
+ for(stk=andstack; stk<andp; stk++)
+ printf("0%o\t0%o\n", stk->first->type, stk->last->type);
}
-static void
+static void
dump(Reprog *pp)
{
- Reinst *l;
- Rune *p;
-
- l = pp->firstinst;
- do{
- printf("%d:\t0%o\t%d\t%d", (int)(l-pp->firstinst), l->type,
- (int)(l->l.left-pp->firstinst), (int)(l->r.right-pp->firstinst));
- if(l->type == RUNE)
- printf("\t%C\n", l->r.r);
- else if(l->type == CCLASS || l->type == NCCLASS){
- printf("\t[");
- if(l->type == NCCLASS)
- printf("^");
- for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
- if(p[0] == p[1])
- printf("%C", p[0]);
- else
- printf("%C-%C", p[0], p[1]);
- printf("]\n");
- } else
- printf("\n");
- }while(l++->type);
+ Reinst *l;
+ Rune *p;
+
+ l = pp->firstinst;
+ do{
+ printf("%d:\t0%o\t%d\t%d", (int)(l-pp->firstinst), l->type,
+ (int)(l->l.left-pp->firstinst), (int)(l->r.right-pp->firstinst));
+ if(l->type == RUNE)
+ printf("\t%C\n", l->r.r);
+ else if(l->type == CCLASS || l->type == NCCLASS){
+ printf("\t[");
+ if(l->type == NCCLASS)
+ printf("^");
+ for(p = l->r.cp->spans; p < l->r.cp->end; p += 2)
+ if(p[0] == p[1])
+ printf("%C", p[0]);
+ else
+ printf("%C-%C", p[0], p[1]);
+ printf("]\n");
+ } else
+ printf("\n");
+ }while(l++->type);
}
#endif
-static Reclass*
+static Reclass*
newclass(void)
{
- if(nclass >= nelem(reprog->class))
- rcerror("too many character classes; increase Reprog.class size");
- return &(classp[nclass++]);
+ if(nclass >= nelem(reprog->class))
+ rcerror("too many character classes; increase Reprog.class size");
+ return &(classp[nclass++]);
}
-static int
+static int
nextc(Rune *rp)
{
- if(lexdone){
- *rp = 0;
- return 1;
- }
- exprp += chartorune(rp, exprp);
- if(*rp == '\\'){
- exprp += chartorune(rp, exprp);
- return 1;
- }
- if(*rp == 0)
- lexdone = 1;
- return 0;
+ if(lexdone){
+ *rp = 0;
+ return 1;
+ }
+ exprp += chartorune(rp, exprp);
+ if(*rp == '\\'){
+ exprp += chartorune(rp, exprp);
+ return 1;
+ }
+ if(*rp == 0)
+ lexdone = 1;
+ return 0;
}
-static int
+static int
lex(int literal, int dot_type)
{
- int quoted;
-
- quoted = nextc(&yyrune);
- if(literal || quoted){
- if(yyrune == 0)
- return END;
- return RUNE;
- }
-
- switch(yyrune){
- case 0:
- return END;
- case '*':
- return STAR;
- case '?':
- return QUEST;
- case '+':
- return PLUS;
- case '|':
- return OR;
- case '.':
- return dot_type;
- case '(':
- return LBRA;
- case ')':
- return RBRA;
- case '^':
- return BOL;
- case '$':
- return EOL;
- case '[':
- return bldcclass();
- }
- return RUNE;
+ int quoted;
+
+ quoted = nextc(&yyrune);
+ if(literal || quoted){
+ if(yyrune == 0)
+ return END;
+ return RUNE;
+ }
+
+ switch(yyrune){
+ case 0:
+ return END;
+ case '*':
+ return STAR;
+ case '?':
+ return QUEST;
+ case '+':
+ return PLUS;
+ case '|':
+ return OR;
+ case '.':
+ return dot_type;
+ case '(':
+ return LBRA;
+ case ')':
+ return RBRA;
+ case '^':
+ return BOL;
+ case '$':
+ return EOL;
+ case '[':
+ return bldcclass();
+ }
+ return RUNE;
}
static int
bldcclass(void)
{
- int type;
- Rune r[NCCRUNE];
- Rune *p, *ep, *np;
- Rune rune;
- int quoted;
-
- /* we have already seen the '[' */
- type = CCLASS;
- yyclassp = newclass();
-
- /* look ahead for negation */
- /* SPECIAL CASE!!! negated classes don't match \n */
- ep = r;
- quoted = nextc(&rune);
- if(!quoted && rune == '^'){
- type = NCCLASS;
- quoted = nextc(&rune);
- *ep++ = '\n';
- *ep++ = '\n';
- }
-
- /* parse class into a set of spans */
- while(ep < &r[NCCRUNE-1]){
- if(rune == 0){
- rcerror("malformed '[]'");
- return 0;
- }
- if(!quoted && rune == ']')
- break;
- if(!quoted && rune == '-'){
- if(ep == r){
- rcerror("malformed '[]'");
- return 0;
- }
- quoted = nextc(&rune);
- if((!quoted && rune == ']') || rune == 0){
- rcerror("malformed '[]'");
- return 0;
- }
- *(ep-1) = rune;
- } else {
- *ep++ = rune;
- *ep++ = rune;
- }
- quoted = nextc(&rune);
- }
- if(ep >= &r[NCCRUNE-1]) {
- rcerror("char class too large; increase Reclass.spans size");
- return 0;
- }
-
- /* sort on span start */
- for(p = r; p < ep; p += 2){
- for(np = p; np < ep; np += 2)
- if(*np < *p){
- rune = np[0];
- np[0] = p[0];
- p[0] = rune;
- rune = np[1];
- np[1] = p[1];
- p[1] = rune;
- }
- }
-
- /* merge spans */
- np = yyclassp->spans;
- p = r;
- if(r == ep)
- yyclassp->end = np;
- else {
- np[0] = *p++;
- np[1] = *p++;
- for(; p < ep; p += 2)
- /* overlapping or adjacent ranges? */
- if(p[0] <= np[1] + 1){
- if(p[1] >= np[1])
- np[1] = p[1]; /* coalesce */
- } else {
- np += 2;
- np[0] = p[0];
- np[1] = p[1];
- }
- yyclassp->end = np+2;
- }
-
- return type;
+ int type;
+ Rune r[NCCRUNE];
+ Rune *p, *ep, *np;
+ Rune rune;
+ int quoted;
+
+ /* we have already seen the '[' */
+ type = CCLASS;
+ yyclassp = newclass();
+
+ /* look ahead for negation */
+ /* SPECIAL CASE!!! negated classes don't match \n */
+ ep = r;
+ quoted = nextc(&rune);
+ if(!quoted && rune == '^'){
+ type = NCCLASS;
+ quoted = nextc(&rune);
+ *ep++ = '\n';
+ *ep++ = '\n';
+ }
+
+ /* parse class into a set of spans */
+ while(ep < &r[NCCRUNE-1]){
+ if(rune == 0){
+ rcerror("malformed '[]'");
+ return 0;
+ }
+ if(!quoted && rune == ']')
+ break;
+ if(!quoted && rune == '-'){
+ if(ep == r){
+ rcerror("malformed '[]'");
+ return 0;
+ }
+ quoted = nextc(&rune);
+ if((!quoted && rune == ']') || rune == 0){
+ rcerror("malformed '[]'");
+ return 0;
+ }
+ *(ep-1) = rune;
+ } else {
+ *ep++ = rune;
+ *ep++ = rune;
+ }
+ quoted = nextc(&rune);
+ }
+ if(ep >= &r[NCCRUNE-1]) {
+ rcerror("char class too large; increase Reclass.spans size");
+ return 0;
+ }
+
+ /* sort on span start */
+ for(p = r; p < ep; p += 2){
+ for(np = p; np < ep; np += 2)
+ if(*np < *p){
+ rune = np[0];
+ np[0] = p[0];
+ p[0] = rune;
+ rune = np[1];
+ np[1] = p[1];
+ p[1] = rune;
+ }
+ }
+
+ /* merge spans */
+ np = yyclassp->spans;
+ p = r;
+ if(r == ep)
+ yyclassp->end = np;
+ else {
+ np[0] = *p++;
+ np[1] = *p++;
+ for(; p < ep; p += 2)
+ /* overlapping or adjacent ranges? */
+ if(p[0] <= np[1] + 1){
+ if(p[1] >= np[1])
+ np[1] = p[1]; /* coalesce */
+ } else {
+ np += 2;
+ np[0] = p[0];
+ np[1] = p[1];
+ }
+ yyclassp->end = np+2;
+ }
+
+ return type;
}
-static Reprog*
+static Reprog*
regcomp1(char *s, int literal, int dot_type)
{
- int token;
- Reprog *volatile pp;
-
- /* get memory for the program */
- pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
- if(pp == 0){
- regerror("out of memory");
- return 0;
- }
- freep = pp->firstinst;
- classp = pp->class;
- errors = 0;
-
- if(setjmp(regkaboom))
- goto out;
-
- /* go compile the sucker */
- lexdone = 0;
- exprp = s;
- nclass = 0;
- nbra = 0;
- atorp = atorstack;
- andp = andstack;
- subidp = subidstack;
- lastwasand = FALSE;
- cursubid = 0;
-
- /* Start with a low priority operator to prime parser */
- pushator(START-1);
- while((token = lex(literal, dot_type)) != END){
- if((token&0300) == OPERATOR)
- operator(token);
- else
- operand(token);
- }
-
- /* Close with a low priority operator */
- evaluntil(START);
-
- /* Force END */
- operand(END);
- evaluntil(START);
+ int token;
+ Reprog *volatile pp;
+
+ /* get memory for the program */
+ pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
+ if(pp == 0){
+ regerror("out of memory");
+ return 0;
+ }
+ freep = pp->firstinst;
+ classp = pp->class;
+ errors = 0;
+
+ if(setjmp(regkaboom))
+ goto out;
+
+ /* go compile the sucker */
+ lexdone = 0;
+ exprp = s;
+ nclass = 0;
+ nbra = 0;
+ atorp = atorstack;
+ andp = andstack;
+ subidp = subidstack;
+ lastwasand = FALSE;
+ cursubid = 0;
+
+ /* Start with a low priority operator to prime parser */
+ pushator(START-1);
+ while((token = lex(literal, dot_type)) != END){
+ if((token&0300) == OPERATOR)
+ operator(token);
+ else
+ operand(token);
+ }
+
+ /* Close with a low priority operator */
+ evaluntil(START);
+
+ /* Force END */
+ operand(END);
+ evaluntil(START);
#ifdef DEBUG
- dumpstack();
+ dumpstack();
#endif
- if(nbra)
- rcerror("unmatched left paren");
- --andp; /* points to first and only operand */
- pp->startinst = andp->first;
+ if(nbra)
+ rcerror("unmatched left paren");
+ --andp; /* points to first and only operand */
+ pp->startinst = andp->first;
#ifdef DEBUG
- dump(pp);
+ dump(pp);
#endif
- pp = optimize(pp);
+ pp = optimize(pp);
#ifdef DEBUG
- printf("start: %d\n", (int)(andp->first-pp->firstinst));
- dump(pp);
+ printf("start: %d\n", (int)(andp->first-pp->firstinst));
+ dump(pp);
#endif
out:
- if(errors){
- free(pp);
- pp = 0;
- }
- return pp;
+ if(errors){
+ free(pp);
+ pp = 0;
+ }
+ return pp;
}
-extern Reprog*
+extern Reprog*
regcomp(char *s)
{
- return regcomp1(s, 0, ANY);
+ return regcomp1(s, 0, ANY);
}
-extern Reprog*
+extern Reprog*
regcomplit(char *s)
{
- return regcomp1(s, 1, ANY);
+ return regcomp1(s, 1, ANY);
}
-extern Reprog*
+extern Reprog*
regcompnl(char *s)
{
- return regcomp1(s, 0, ANYNL);
+ return regcomp1(s, 0, ANYNL);
}
void
regerror(char *s)
{
- char buf[132];
+ char buf[132];
- strncpy(buf, "regerror: ", sizeof(buf));
- strncat(buf, s, sizeof(buf)-1);
- strncat(buf, "\n", sizeof(buf)-1);
- write(2, buf, strlen(buf));
- exit(1);
+ strncpy(buf, "regerror: ", sizeof(buf));
+ strncat(buf, s, sizeof(buf)-1);
+ strncat(buf, "\n", sizeof(buf)-1);
+ write(2, buf, strlen(buf));
+ exit(1);
}
/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
+ * return 0 if no match
+ * >0 if a match
+ * <0 if we ran out of _relist space
*/
static int
-regexec1(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j
+regexec1(Reprog *progp, /* program to run */
+ char *bol, /* string to run machine on */
+ Resub *mp, /* subexpression elements */
+ int ms, /* number of elements at mp */
+ Reljunk *j
)
{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- char *s;
- int i, checkstart;
- Rune r, *rp, *ep;
- int n;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
- char *p;
+ int flag=0;
+ Reinst *inst;
+ Relist *tlp;
+ char *s;
+ int i, checkstart;
+ Rune r, *rp, *ep;
+ int n;
+ Relist* tl; /* This list, next list */
+ Relist* nl;
+ Relist* tle; /* ends of this and next list */
+ Relist* nle;
+ int match;
+ char *p;
- match = 0;
- checkstart = j->starttype;
- if(mp)
- for(i=0; i<ms; i++) {
- mp[i].s.sp = 0;
- mp[i].e.ep = 0;
- }
- j->relist[0][0].inst = 0;
- j->relist[1][0].inst = 0;
+ match = 0;
+ checkstart = j->starttype;
+ if(mp)
+ for(i=0; i<ms; i++) {
+ mp[i].s.sp = 0;
+ mp[i].e.ep = 0;
+ }
+ j->relist[0][0].inst = 0;
+ j->relist[1][0].inst = 0;
- /* Execute machine once for each character, including terminal NUL */
- s = j->starts;
- do{
- /* fast check for first char */
- if(checkstart) {
- switch(j->starttype) {
- case RUNE:
- p = utfrune(s, j->startchar);
- if(p == 0 || s == j->eol)
- return match;
- s = p;
- break;
- case BOL:
- if(s == bol)
- break;
- p = utfrune(s, '\n');
- if(p == 0 || s == j->eol)
- return match;
- s = p+1;
- break;
- }
- }
- r = *(unsigned char*)s;
- if(r < Runeself)
- n = 1;
- else
- n = chartorune(&r, s);
+ /* Execute machine once for each character, including terminal NUL */
+ s = j->starts;
+ do{
+ /* fast check for first char */
+ if(checkstart) {
+ switch(j->starttype) {
+ case RUNE:
+ p = utfrune(s, j->startchar);
+ if(p == 0 || s == j->eol)
+ return match;
+ s = p;
+ break;
+ case BOL:
+ if(s == bol)
+ break;
+ p = utfrune(s, '\n');
+ if(p == 0 || s == j->eol)
+ return match;
+ s = p+1;
+ break;
+ }
+ }
+ r = *(unsigned char*)s;
+ if(r < Runeself)
+ n = 1;
+ else
+ n = chartorune(&r, s);
- /* switch run lists */
- tl = j->relist[flag];
- tle = j->reliste[flag];
- nl = j->relist[flag^=1];
- nle = j->reliste[flag];
- nl->inst = 0;
+ /* switch run lists */
+ tl = j->relist[flag];
+ tle = j->reliste[flag];
+ nl = j->relist[flag^=1];
+ nle = j->reliste[flag];
+ nl->inst = 0;
- /* Add first instruction to current list */
- if(match == 0)
- _renewemptythread(tl, progp->startinst, ms, s);
+ /* Add first instruction to current list */
+ if(match == 0)
+ _renewemptythread(tl, progp->startinst, ms, s);
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
- for(inst = tlp->inst; inst; inst = inst->l.next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r.r == r){
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- }
- break;
- case LBRA:
- tlp->se.m[inst->r.subid].s.sp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->r.subid].e.ep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(s == j->eol || r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].e.ep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
- }
- break;
- }
- }
- if(s == j->eol)
- break;
- checkstart = j->starttype && nl->inst==0;
- s += n;
- }while(r);
- return match;
+ /* Execute machine until current list is empty */
+ for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
+ for(inst = tlp->inst; inst; inst = inst->l.next){
+ switch(inst->type){
+ case RUNE: /* regular character */
+ if(inst->r.r == r){
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ }
+ break;
+ case LBRA:
+ tlp->se.m[inst->r.subid].s.sp = s;
+ continue;
+ case RBRA:
+ tlp->se.m[inst->r.subid].e.ep = s;
+ continue;
+ case ANY:
+ if(r != '\n')
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ case ANYNL:
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ case BOL:
+ if(s == bol || *(s-1) == '\n')
+ continue;
+ break;
+ case EOL:
+ if(s == j->eol || r == 0 || r == '\n')
+ continue;
+ break;
+ case CCLASS:
+ ep = inst->r.cp->end;
+ for(rp = inst->r.cp->spans; rp < ep; rp += 2)
+ if(r >= rp[0] && r <= rp[1]){
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ }
+ break;
+ case NCCLASS:
+ ep = inst->r.cp->end;
+ for(rp = inst->r.cp->spans; rp < ep; rp += 2)
+ if(r >= rp[0] && r <= rp[1])
+ break;
+ if(rp == ep)
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ case OR:
+ /* evaluate right choice later */
+ if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle)
+ return -1;
+ /* efficiency: advance and re-evaluate */
+ continue;
+ case END: /* Match! */
+ match = 1;
+ tlp->se.m[0].e.ep = s;
+ if(mp != 0)
+ _renewmatch(mp, ms, &tlp->se);
+ break;
+ }
+ break;
+ }
+ }
+ if(s == j->eol)
+ break;
+ checkstart = j->starttype && nl->inst==0;
+ s += n;
+ }while(r);
+ return match;
}
static int
-regexec2(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j
+regexec2(Reprog *progp, /* program to run */
+ char *bol, /* string to run machine on */
+ Resub *mp, /* subexpression elements */
+ int ms, /* number of elements at mp */
+ Reljunk *j
)
{
- int rv;
- Relist *relist0, *relist1;
+ int rv;
+ Relist *relist0, *relist1;
- /* mark space */
- relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
- if(relist0 == NULL)
- return -1;
- relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
- if(relist1 == NULL){
- free(relist0);
- return -1;
- }
- j->relist[0] = relist0;
- j->relist[1] = relist1;
- j->reliste[0] = relist0 + BIGLISTSIZE - 2;
- j->reliste[1] = relist1 + BIGLISTSIZE - 2;
+ /* mark space */
+ relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
+ if(relist0 == NULL)
+ return -1;
+ relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
+ if(relist1 == NULL){
+ free(relist0);
+ return -1;
+ }
+ j->relist[0] = relist0;
+ j->relist[1] = relist1;
+ j->reliste[0] = relist0 + BIGLISTSIZE - 2;
+ j->reliste[1] = relist1 + BIGLISTSIZE - 2;
- rv = regexec1(progp, bol, mp, ms, j);
- free(relist0);
- free(relist1);
- return rv;
+ rv = regexec1(progp, bol, mp, ms, j);
+ free(relist0);
+ free(relist1);
+ return rv;
}
extern int
-regexec(Reprog *progp, /* program to run */
- char *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
+regexec(Reprog *progp, /* program to run */
+ char *bol, /* string to run machine on */
+ Resub *mp, /* subexpression elements */
+ int ms) /* number of elements at mp */
{
- Reljunk j;
- Relist relist0[LISTSIZE], relist1[LISTSIZE];
- int rv;
+ Reljunk j;
+ Relist relist0[LISTSIZE], relist1[LISTSIZE];
+ int rv;
- /*
- * use user-specified starting/ending location if specified
- */
- j.starts = bol;
- j.eol = 0;
- if(mp && ms>0){
- if(mp->s.sp)
- j.starts = mp->s.sp;
- if(mp->e.ep)
- j.eol = mp->e.ep;
- }
- j.starttype = 0;
- j.startchar = 0;
- if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) {
- j.starttype = RUNE;
- j.startchar = progp->startinst->r.r;
- }
- if(progp->startinst->type == BOL)
- j.starttype = BOL;
+ /*
+ * use user-specified starting/ending location if specified
+ */
+ j.starts = bol;
+ j.eol = 0;
+ if(mp && ms>0){
+ if(mp->s.sp)
+ j.starts = mp->s.sp;
+ if(mp->e.ep)
+ j.eol = mp->e.ep;
+ }
+ j.starttype = 0;
+ j.startchar = 0;
+ if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) {
+ j.starttype = RUNE;
+ j.startchar = progp->startinst->r.r;
+ }
+ if(progp->startinst->type == BOL)
+ j.starttype = BOL;
- /* mark space */
- j.relist[0] = relist0;
- j.relist[1] = relist1;
- j.reliste[0] = relist0 + nelem(relist0) - 2;
- j.reliste[1] = relist1 + nelem(relist1) - 2;
+ /* mark space */
+ j.relist[0] = relist0;
+ j.relist[1] = relist1;
+ j.reliste[0] = relist0 + nelem(relist0) - 2;
+ j.reliste[1] = relist1 + nelem(relist1) - 2;
- rv = regexec1(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- rv = regexec2(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- return -1;
+ rv = regexec1(progp, bol, mp, ms, &j);
+ if(rv >= 0)
+ return rv;
+ rv = regexec2(progp, bol, mp, ms, &j);
+ if(rv >= 0)
+ return rv;
+ return -1;
}
#ifndef _REGEXP9_H_
#define _REGEXP9_H_ 1
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#ifdef AUTOLIB
-AUTOLIB(regexp9)
-#endif
#include "utf.h"
-typedef struct Resub Resub;
-typedef struct Reclass Reclass;
-typedef struct Reinst Reinst;
-typedef struct Reprog Reprog;
+typedef struct Reinst Reinst;
/*
- * Sub expression matches
+ * Sub expression matches
*/
-struct Resub{
- union
- {
- char *sp;
- Rune *rsp;
- }s;
- union
- {
- char *ep;
- Rune *rep;
- }e;
-};
+typedef struct Resub {
+ union
+ {
+ char *sp;
+ Rune *rsp;
+ }s;
+ union
+ {
+ char *ep;
+ Rune *rep;
+ }e;
+} Resub;
/*
- * character class, each pair of rune's defines a range
+ * character class, each pair of rune's defines a range
*/
-struct Reclass{
- Rune *end;
- Rune spans[64];
-};
+typedef struct Reclass {
+ Rune *end;
+ Rune spans[64];
+} Reclass;
/*
- * Machine instructions
+ * Machine instructions
*/
-struct Reinst{
- int type;
- union {
- Reclass *cp; /* class pointer */
- Rune r; /* character */
- int subid; /* sub-expression id for RBRA and LBRA */
- Reinst *right; /* right child of OR */
- }r;
- union { /* regexp relies on these two being in the same union */
- Reinst *left; /* left child of OR */
- Reinst *next; /* next instruction for CAT & LBRA */
- }l;
+struct Reinst {
+ int type;
+ union {
+ Reclass *cp; /* class pointer */
+ Rune r; /* character */
+ int subid; /* sub-expression id for RBRA and LBRA */
+ Reinst *right; /* right child of OR */
+ }r;
+ union { /* regexp relies on these two being in the same union */
+ Reinst *left; /* left child of OR */
+ Reinst *next; /* next instruction for CAT & LBRA */
+ }l;
};
/*
- * Reprogram definition
+ * Reprogram definition
*/
-struct Reprog{
- Reinst *startinst; /* start pc */
- Reclass class[16]; /* .data */
- Reinst firstinst[5]; /* .text */
-};
+typedef struct Reprog {
+ Reinst *startinst; /* start pc */
+ Reclass class[16]; /* .data */
+ Reinst firstinst[5]; /* .text */
+} Reprog;
-extern Reprog *regcomp9(char*);
-extern Reprog *regcomplit9(char*);
-extern Reprog *regcompnl9(char*);
-extern void regerror9(char*);
-extern int regexec9(Reprog*, char*, Resub*, int);
-extern void regsub9(char*, char*, int, Resub*, int);
+extern Reprog *regcomp9(char*);
+extern Reprog *regcomplit9(char*);
+extern Reprog *regcompnl9(char*);
+extern void regerror9(char*);
+extern int regexec9(Reprog*, char*, Resub*, int);
+extern void regsub9(char*, char*, int, Resub*, int);
-extern int rregexec9(Reprog*, Rune*, Resub*, int);
-extern void rregsub9(Rune*, Rune*, int, Resub*, int);
+extern int rregexec9(Reprog*, Rune*, Resub*, int);
+extern void rregsub9(Rune*, Rune*, int, Resub*, int);
/*
* Darwin simply cannot handle having routines that
#define rregsub rregsub9
#endif
-#if defined(__cplusplus)
-}
-#endif
#endif
#include "regexp9.h"
/* substitute into one string using the matches from the last regexec() */
-extern void
-regsub(char *sp, /* source string */
- char *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
+extern void
+regsub(char *sp, /* source string */
+ char *dp, /* destination string */
+ int dlen,
+ Resub *mp, /* subexpression elements */
+ int ms) /* number of elements pointed to by mp */
{
- char *ssp, *ep;
- int i;
+ char *ssp, *ep;
+ int i;
- ep = dp+dlen-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp!=0 && mp[i].s.sp != 0 && ms>i)
- for(ssp = mp[i].s.sp; ssp < mp[i].e.ep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
- }
- }else if(*sp == '&'){
- if(mp!=0 && mp[0].s.sp != 0 && ms>0)
- for(ssp = mp[0].s.sp; ssp < mp[0].e.ep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
- }
- sp++;
- }
- *dp = '\0';
+ ep = dp+dlen-1;
+ while(*sp != '\0'){
+ if(*sp == '\\'){
+ switch(*++sp){
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ i = *sp-'0';
+ if(mp!=0 && mp[i].s.sp != 0 && ms>i)
+ for(ssp = mp[i].s.sp; ssp < mp[i].e.ep; ssp++)
+ if(dp < ep)
+ *dp++ = *ssp;
+ break;
+ case '\\':
+ if(dp < ep)
+ *dp++ = '\\';
+ break;
+ case '\0':
+ sp--;
+ break;
+ default:
+ if(dp < ep)
+ *dp++ = *sp;
+ break;
+ }
+ }else if(*sp == '&'){
+ if(mp!=0 && mp[0].s.sp != 0 && ms>0)
+ for(ssp = mp[0].s.sp; ssp < mp[0].e.ep; ssp++)
+ if(dp < ep)
+ *dp++ = *ssp;
+ }else{
+ if(dp < ep)
+ *dp++ = *sp;
+ }
+ sp++;
+ }
+ *dp = '\0';
}
#include "regcomp.h"
/*
- * return 0 if no match
- * >0 if a match
- * <0 if we ran out of _relist space
+ * return 0 if no match
+ * >0 if a match
+ * <0 if we ran out of _relist space
*/
static int
-rregexec1(Reprog *progp, /* program to run */
- Rune *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j)
+rregexec1(Reprog *progp, /* program to run */
+ Rune *bol, /* string to run machine on */
+ Resub *mp, /* subexpression elements */
+ int ms, /* number of elements at mp */
+ Reljunk *j)
{
- int flag=0;
- Reinst *inst;
- Relist *tlp;
- Rune *s;
- int i, checkstart;
- Rune r, *rp, *ep;
- Relist* tl; /* This list, next list */
- Relist* nl;
- Relist* tle; /* ends of this and next list */
- Relist* nle;
- int match;
- Rune *p;
+ int flag=0;
+ Reinst *inst;
+ Relist *tlp;
+ Rune *s;
+ int i, checkstart;
+ Rune r, *rp, *ep;
+ Relist* tl; /* This list, next list */
+ Relist* nl;
+ Relist* tle; /* ends of this and next list */
+ Relist* nle;
+ int match;
+ Rune *p;
- match = 0;
- checkstart = j->startchar;
- if(mp)
- for(i=0; i<ms; i++) {
- mp[i].s.rsp = 0;
- mp[i].e.rep = 0;
- }
- j->relist[0][0].inst = 0;
- j->relist[1][0].inst = 0;
+ match = 0;
+ checkstart = j->startchar;
+ if(mp)
+ for(i=0; i<ms; i++) {
+ mp[i].s.rsp = 0;
+ mp[i].e.rep = 0;
+ }
+ j->relist[0][0].inst = 0;
+ j->relist[1][0].inst = 0;
- /* Execute machine once for each character, including terminal NUL */
- s = j->rstarts;
- do{
- /* fast check for first char */
- if(checkstart) {
- switch(j->starttype) {
- case RUNE:
- p = runestrchr(s, j->startchar);
- if(p == 0 || s == j->reol)
- return match;
- s = p;
- break;
- case BOL:
- if(s == bol)
- break;
- p = runestrchr(s, '\n');
- if(p == 0 || s == j->reol)
- return match;
- s = p+1;
- break;
- }
- }
+ /* Execute machine once for each character, including terminal NUL */
+ s = j->rstarts;
+ do{
+ /* fast check for first char */
+ if(checkstart) {
+ switch(j->starttype) {
+ case RUNE:
+ p = runestrchr(s, j->startchar);
+ if(p == 0 || s == j->reol)
+ return match;
+ s = p;
+ break;
+ case BOL:
+ if(s == bol)
+ break;
+ p = runestrchr(s, '\n');
+ if(p == 0 || s == j->reol)
+ return match;
+ s = p+1;
+ break;
+ }
+ }
- r = *s;
+ r = *s;
- /* switch run lists */
- tl = j->relist[flag];
- tle = j->reliste[flag];
- nl = j->relist[flag^=1];
- nle = j->reliste[flag];
- nl->inst = 0;
+ /* switch run lists */
+ tl = j->relist[flag];
+ tle = j->reliste[flag];
+ nl = j->relist[flag^=1];
+ nle = j->reliste[flag];
+ nl->inst = 0;
- /* Add first instruction to current list */
- _rrenewemptythread(tl, progp->startinst, ms, s);
+ /* Add first instruction to current list */
+ _rrenewemptythread(tl, progp->startinst, ms, s);
- /* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){
- for(inst=tlp->inst; ; inst = inst->l.next){
- switch(inst->type){
- case RUNE: /* regular character */
- if(inst->r.r == r)
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- case LBRA:
- tlp->se.m[inst->r.subid].s.rsp = s;
- continue;
- case RBRA:
- tlp->se.m[inst->r.subid].e.rep = s;
- continue;
- case ANY:
- if(r != '\n')
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- case ANYNL:
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- case BOL:
- if(s == bol || *(s-1) == '\n')
- continue;
- break;
- case EOL:
- if(s == j->reol || r == 0 || r == '\n')
- continue;
- break;
- case CCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- }
- break;
- case NCCLASS:
- ep = inst->r.cp->end;
- for(rp = inst->r.cp->spans; rp < ep; rp += 2)
- if(r >= rp[0] && r <= rp[1])
- break;
- if(rp == ep)
- if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
- return -1;
- break;
- case OR:
- /* evaluate right choice later */
- if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle)
- return -1;
- /* efficiency: advance and re-evaluate */
- continue;
- case END: /* Match! */
- match = 1;
- tlp->se.m[0].e.rep = s;
- if(mp != 0)
- _renewmatch(mp, ms, &tlp->se);
- break;
- }
- break;
- }
- }
- if(s == j->reol)
- break;
- checkstart = j->startchar && nl->inst==0;
- s++;
- }while(r);
- return match;
+ /* Execute machine until current list is empty */
+ for(tlp=tl; tlp->inst; tlp++){
+ for(inst=tlp->inst; ; inst = inst->l.next){
+ switch(inst->type){
+ case RUNE: /* regular character */
+ if(inst->r.r == r)
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ case LBRA:
+ tlp->se.m[inst->r.subid].s.rsp = s;
+ continue;
+ case RBRA:
+ tlp->se.m[inst->r.subid].e.rep = s;
+ continue;
+ case ANY:
+ if(r != '\n')
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ case ANYNL:
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ case BOL:
+ if(s == bol || *(s-1) == '\n')
+ continue;
+ break;
+ case EOL:
+ if(s == j->reol || r == 0 || r == '\n')
+ continue;
+ break;
+ case CCLASS:
+ ep = inst->r.cp->end;
+ for(rp = inst->r.cp->spans; rp < ep; rp += 2)
+ if(r >= rp[0] && r <= rp[1]){
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ }
+ break;
+ case NCCLASS:
+ ep = inst->r.cp->end;
+ for(rp = inst->r.cp->spans; rp < ep; rp += 2)
+ if(r >= rp[0] && r <= rp[1])
+ break;
+ if(rp == ep)
+ if(_renewthread(nl, inst->l.next, ms, &tlp->se)==nle)
+ return -1;
+ break;
+ case OR:
+ /* evaluate right choice later */
+ if(_renewthread(tlp, inst->r.right, ms, &tlp->se) == tle)
+ return -1;
+ /* efficiency: advance and re-evaluate */
+ continue;
+ case END: /* Match! */
+ match = 1;
+ tlp->se.m[0].e.rep = s;
+ if(mp != 0)
+ _renewmatch(mp, ms, &tlp->se);
+ break;
+ }
+ break;
+ }
+ }
+ if(s == j->reol)
+ break;
+ checkstart = j->startchar && nl->inst==0;
+ s++;
+ }while(r);
+ return match;
}
static int
-rregexec2(Reprog *progp, /* program to run */
- Rune *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
- Reljunk *j
+rregexec2(Reprog *progp, /* program to run */
+ Rune *bol, /* string to run machine on */
+ Resub *mp, /* subexpression elements */
+ int ms, /* number of elements at mp */
+ Reljunk *j
)
{
- Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
+ Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
- /* mark space */
- j->relist[0] = relist0;
- j->relist[1] = relist1;
- j->reliste[0] = relist0 + nelem(relist0) - 2;
- j->reliste[1] = relist1 + nelem(relist1) - 2;
+ /* mark space */
+ j->relist[0] = relist0;
+ j->relist[1] = relist1;
+ j->reliste[0] = relist0 + nelem(relist0) - 2;
+ j->reliste[1] = relist1 + nelem(relist1) - 2;
- return rregexec1(progp, bol, mp, ms, j);
+ return rregexec1(progp, bol, mp, ms, j);
}
extern int
-rregexec(Reprog *progp, /* program to run */
- Rune *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements at mp */
+rregexec(Reprog *progp, /* program to run */
+ Rune *bol, /* string to run machine on */
+ Resub *mp, /* subexpression elements */
+ int ms) /* number of elements at mp */
{
- Reljunk j;
- Relist relist0[LISTSIZE], relist1[LISTSIZE];
- int rv;
+ Reljunk j;
+ Relist relist0[LISTSIZE], relist1[LISTSIZE];
+ int rv;
- /*
- * use user-specified starting/ending location if specified
- */
- j.rstarts = bol;
- j.reol = 0;
- if(mp && ms>0){
- if(mp->s.sp)
- j.rstarts = mp->s.rsp;
- if(mp->e.ep)
- j.reol = mp->e.rep;
- }
- j.starttype = 0;
- j.startchar = 0;
- if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) {
- j.starttype = RUNE;
- j.startchar = progp->startinst->r.r;
- }
- if(progp->startinst->type == BOL)
- j.starttype = BOL;
+ /*
+ * use user-specified starting/ending location if specified
+ */
+ j.rstarts = bol;
+ j.reol = 0;
+ if(mp && ms>0){
+ if(mp->s.sp)
+ j.rstarts = mp->s.rsp;
+ if(mp->e.ep)
+ j.reol = mp->e.rep;
+ }
+ j.starttype = 0;
+ j.startchar = 0;
+ if(progp->startinst->type == RUNE && progp->startinst->r.r < Runeself) {
+ j.starttype = RUNE;
+ j.startchar = progp->startinst->r.r;
+ }
+ if(progp->startinst->type == BOL)
+ j.starttype = BOL;
- /* mark space */
- j.relist[0] = relist0;
- j.relist[1] = relist1;
- j.reliste[0] = relist0 + nelem(relist0) - 2;
- j.reliste[1] = relist1 + nelem(relist1) - 2;
+ /* mark space */
+ j.relist[0] = relist0;
+ j.relist[1] = relist1;
+ j.reliste[0] = relist0 + nelem(relist0) - 2;
+ j.reliste[1] = relist1 + nelem(relist1) - 2;
- rv = rregexec1(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- rv = rregexec2(progp, bol, mp, ms, &j);
- if(rv >= 0)
- return rv;
- return -1;
+ rv = rregexec1(progp, bol, mp, ms, &j);
+ if(rv >= 0)
+ return rv;
+ rv = rregexec2(progp, bol, mp, ms, &j);
+ if(rv >= 0)
+ return rv;
+ return -1;
}
#include "regexp9.h"
/* substitute into one string using the matches from the last regexec() */
-extern void
-rregsub(Rune *sp, /* source string */
- Rune *dp, /* destination string */
- int dlen,
- Resub *mp, /* subexpression elements */
- int ms) /* number of elements pointed to by mp */
+extern void
+rregsub(Rune *sp, /* source string */
+ Rune *dp, /* destination string */
+ int dlen,
+ Resub *mp, /* subexpression elements */
+ int ms) /* number of elements pointed to by mp */
{
- Rune *ssp, *ep;
- int i;
+ Rune *ssp, *ep;
+ int i;
- ep = dp+(dlen/sizeof(Rune))-1;
- while(*sp != '\0'){
- if(*sp == '\\'){
- switch(*++sp){
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- i = *sp-'0';
- if(mp!=0 && mp[i].s.rsp != 0 && ms>i)
- for(ssp = mp[i].s.rsp;
- ssp < mp[i].e.rep;
- ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- break;
- case '\\':
- if(dp < ep)
- *dp++ = '\\';
- break;
- case '\0':
- sp--;
- break;
- default:
- if(dp < ep)
- *dp++ = *sp;
- break;
- }
- }else if(*sp == '&'){
- if(mp!=0 && mp[0].s.rsp != 0 && ms>0)
- for(ssp = mp[0].s.rsp;
- ssp < mp[0].e.rep; ssp++)
- if(dp < ep)
- *dp++ = *ssp;
- }else{
- if(dp < ep)
- *dp++ = *sp;
- }
- sp++;
- }
- *dp = '\0';
+ ep = dp+(dlen/sizeof(Rune))-1;
+ while(*sp != '\0'){
+ if(*sp == '\\'){
+ switch(*++sp){
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ i = *sp-'0';
+ if(mp!=0 && mp[i].s.rsp != 0 && ms>i)
+ for(ssp = mp[i].s.rsp;
+ ssp < mp[i].e.rep;
+ ssp++)
+ if(dp < ep)
+ *dp++ = *ssp;
+ break;
+ case '\\':
+ if(dp < ep)
+ *dp++ = '\\';
+ break;
+ case '\0':
+ sp--;
+ break;
+ default:
+ if(dp < ep)
+ *dp++ = *sp;
+ break;
+ }
+ }else if(*sp == '&'){
+ if(mp!=0 && mp[0].s.rsp != 0 && ms>0)
+ for(ssp = mp[0].s.rsp;
+ ssp < mp[0].e.rep; ssp++)
+ if(dp < ep)
+ *dp++ = *ssp;
+ }else{
+ if(dp < ep)
+ *dp++ = *sp;
+ }
+ sp++;
+ }
+ *dp = '\0';
}
struct x
{
- char *re;
- char *s;
- Reprog *p;
+ char *re;
+ char *s;
+ Reprog *p;
};
struct x t[] = {
- { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 },
- { "^local!(.*)$", "/mail/box/\\1/mbox", 0 },
- { "^plan9!(.*)$", "\\1", 0 },
- { "^helix!(.*)$", "\\1", 0 },
- { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 },
- { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 },
- { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 },
- { "^\xE2\x98\xBA$", "smiley", 0 },
- { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 },
- { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 },
- { 0, 0, 0 },
+ { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 },
+ { "^local!(.*)$", "/mail/box/\\1/mbox", 0 },
+ { "^plan9!(.*)$", "\\1", 0 },
+ { "^helix!(.*)$", "\\1", 0 },
+ { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 },
+ { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 },
+ { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 },
+ { "^\xE2\x98\xBA$", "smiley", 0 },
+ { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 },
+ { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 },
+ { 0, 0, 0 },
};
int
main(int ac, char **av)
{
- Resub rs[10];
- char dst[128];
- struct x *tp;
+ Resub rs[10];
+ char dst[128];
+ struct x *tp;
- if(ac != 2)
- exit(1);
+ if(ac != 2)
+ exit(1);
- for(tp = t; tp->re; tp++)
- tp->p = regcomp(tp->re);
+ for(tp = t; tp->re; tp++)
+ tp->p = regcomp(tp->re);
- for(tp = t; tp->re; tp++){
- printf("%s VIA %s", av[1], tp->re);
- memset(rs, 0, sizeof rs);
- if(regexec(tp->p, av[1], rs, 10)){
- regsub(tp->s, dst, sizeof dst, rs, 10);
- printf(" sub %s -> %s", tp->s, dst);
- }
- printf("\n");
- }
- exit(0);
+ for(tp = t; tp->re; tp++){
+ printf("%s VIA %s", av[1], tp->re);
+ memset(rs, 0, sizeof rs);
+ if(regexec(tp->p, av[1], rs, 10)){
+ regsub(tp->s, dst, sizeof dst, rs, 10);
+ printf(" sub %s -> %s", tp->s, dst);
+ }
+ printf("\n");
+ }
+ exit(0);
}
int
main()
{
- Resub rs[10];
- Reprog *p;
- char *s;
+ Resub rs[10];
+ Reprog *p;
+ char *s;
- p = regcomp("[^a-z]");
- s = "\n";
- if(regexec(p, s, rs, 10))
- printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep);
- s = "0";
- if(regexec(p, s, rs, 10))
- printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep);
- exit(0);
+ p = regcomp("[^a-z]");
+ s = "\n";
+ if(regexec(p, s, rs, 10))
+ printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep);
+ s = "0";
+ if(regexec(p, s, rs, 10))
+ printf("%s %p %p %p\n", s, s, rs[0].s.sp, rs[0].e.ep);
+ exit(0);
}
enum
{
- Bit1 = 7,
- Bitx = 6,
- Bit2 = 5,
- Bit3 = 4,
- Bit4 = 3,
- Bit5 = 2,
-
- T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
- Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
- T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
- T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
- T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
- T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
-
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
- Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
-
- Maskx = (1<<Bitx)-1, /* 0011 1111 */
- Testx = Maskx ^ 0xFF, /* 1100 0000 */
-
- Bad = Runeerror
+ Bit1 = 7,
+ Bitx = 6,
+ Bit2 = 5,
+ Bit3 = 4,
+ Bit4 = 3,
+ Bit5 = 2,
+
+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
+
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
+
+ Maskx = (1<<Bitx)-1, /* 0011 1111 */
+ Testx = Maskx ^ 0xFF, /* 1100 0000 */
+
+ Bad = Runeerror
};
int
chartorune(Rune *rune, char *str)
{
- int c, c1, c2, c3;
- long l;
-
- /*
- * one character sequence
- * 00000-0007F => T1
- */
- c = *(unsigned char*)str;
- if(c < Tx) {
- *rune = c;
- return 1;
- }
-
- /*
- * two character sequence
- * 0080-07FF => T2 Tx
- */
- c1 = *(unsigned char*)(str+1) ^ Tx;
- if(c1 & Testx)
- goto bad;
- if(c < T3) {
- if(c < T2)
- goto bad;
- l = ((c << Bitx) | c1) & Rune2;
- if(l <= Rune1)
- goto bad;
- *rune = l;
- return 2;
- }
-
- /*
- * three character sequence
- * 0800-FFFF => T3 Tx Tx
- */
- c2 = *(unsigned char*)(str+2) ^ Tx;
- if(c2 & Testx)
- goto bad;
- if(c < T4) {
- l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
- if(l <= Rune2)
- goto bad;
- *rune = l;
- return 3;
- }
-
- /*
- * four character sequence
- * 10000-10FFFF => T4 Tx Tx Tx
- */
- if(UTFmax >= 4) {
- c3 = *(unsigned char*)(str+3) ^ Tx;
- if(c3 & Testx)
- goto bad;
- if(c < T5) {
- l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
- if(l <= Rune3)
- goto bad;
- if(l > Runemax)
- goto bad;
- *rune = l;
- return 4;
- }
- }
-
- /*
- * bad decoding
- */
+ int c, c1, c2, c3;
+ long l;
+
+ /*
+ * one character sequence
+ * 00000-0007F => T1
+ */
+ c = *(unsigned char*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(unsigned char*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(unsigned char*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ /*
+ * four character sequence
+ * 10000-10FFFF => T4 Tx Tx Tx
+ */
+ if(UTFmax >= 4) {
+ c3 = *(unsigned char*)(str+3) ^ Tx;
+ if(c3 & Testx)
+ goto bad;
+ if(c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if(l <= Rune3)
+ goto bad;
+ if(l > Runemax)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+ }
+
+ /*
+ * bad decoding
+ */
bad:
- *rune = Bad;
- return 1;
+ *rune = Bad;
+ return 1;
}
Rune*
runestrchr(Rune *s, Rune c)
{
- Rune c0 = c;
- Rune c1;
-
- if(c == 0) {
- while(*s++)
- ;
- return s-1;
- }
-
- while((c1 = *s++))
- if(c1 == c0)
- return s-1;
- return 0;
+ Rune c0 = c;
+ Rune c1;
+
+ if(c == 0) {
+ while(*s++)
+ ;
+ return s-1;
+ }
+
+ while((c1 = *s++))
+ if(c1 == c0)
+ return s-1;
+ return 0;
}
char*
utfrune(char *s, Rune c)
{
- Rune c1;
- Rune r;
- int n;
-
- if(c < Runesync) /* not part of utf sequence */
- return strchr(s, c);
-
- for(;;) {
- c1 = *(unsigned char*)s;
- if(c1 < Runeself) { /* one byte rune */
- if(c1 == 0)
- return 0;
- if(c1 == c)
- return s;
- s++;
- continue;
- }
- n = chartorune(&r, s);
- if(r == c)
- return s;
- s += n;
- }
+ Rune c1;
+ Rune r;
+ int n;
+
+ if(c < Runesync) /* not part of utf sequence */
+ return strchr(s, c);
+
+ for(;;) {
+ c1 = *(unsigned char*)s;
+ if(c1 < Runeself) { /* one byte rune */
+ if(c1 == 0)
+ return 0;
+ if(c1 == c)
+ return s;
+ s++;
+ continue;
+ }
+ n = chartorune(&r, s);
+ if(r == c)
+ return s;
+ s += n;
+ }
}