--- /dev/null
+CC=gcc
+CFLAGS+=-Wall -Wextra -O3 -c -g
+O=o
+LIB=libregexp9.a
+
+RANLIB=true
+
+LIB=libregexp9.a
+
+OFILES=\
+ regcomp.$O\
+ regerror.$O\
+ regexec.$O\
+ regsub.$O\
+ regaux.$O\
+ rregexec.$O\
+ rregsub.$O\
+ utf.$O\
+
+HFILES=\
+ regexp9.h\
+ regcomp.h\
+ utf.h\
+
+all: $(LIB)
+
+install: $(LIB)
+ mkdir -p $(PREFIX)/share/man/man3 $(PREFIX)/man/man7
+ install -m 0644 regexp9.3 $(PREFIX)/share/man/man3/regexp9.3
+ install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7
+ mkdir -p $(PREFIX)/lib
+ install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB)
+ mkdir -p $(PREFIX)/include
+ install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h
+
+test: test.$O $(LIB)
+ $(CC) -o test test.$O $(LIB)
+
+test2: test2.$O $(LIB)
+ $(CC) -o test2 test2.$O $(LIB)
+
+$(LIB): $(OFILES)
+ $(AR) $(ARFLAGS) $(LIB) $(OFILES)
+ $(RANLIB) $(LIB)
+
+%.$O: %.c
+ $(CC) $(CFLAGS) $*.c
+
+$(OFILES): $(HFILES)
+
+clean:
+ rm -f $(OFILES) $(LIB)
-#include "lib9.h"
+#include <string.h>
#include "regexp9.h"
#include "regcomp.h"
-#include "lib9.h"
+#include <stdio.h>
+#include <setjmp.h>
+#include <string.h>
+#include <stdlib.h>
#include "regexp9.h"
#include "regcomp.h"
Node *stk;
int *ip;
- print("operators\n");
+ printf("operators\n");
for(ip=atorstack; ip<atorp; ip++)
- print("0%o\n", *ip);
- print("operands\n");
+ printf("0%o\n", *ip);
+ printf("operands\n");
for(stk=andstack; stk<andp; stk++)
- print("0%o\t0%o\n", stk->first->type, stk->last->type);
+ printf("0%o\t0%o\n", stk->first->type, stk->last->type);
}
static void
l = pp->firstinst;
do{
- print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
+ printf("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
if(l->type == RUNE)
- print("\t%C\n", l->u1.r);
+ printf("\t%C\n", l->u1.r);
else if(l->type == CCLASS || l->type == NCCLASS){
- print("\t[");
+ printf("\t[");
if(l->type == NCCLASS)
- print("^");
+ printf("^");
for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
if(p[0] == p[1])
- print("%C", p[0]);
+ printf("%C", p[0]);
else
- print("%C-%C", p[0], p[1]);
- print("]\n");
+ printf("%C-%C", p[0], p[1]);
+ printf("]\n");
} else
- print("\n");
+ printf("\n");
}while(l++->type);
}
#endif
#endif
pp = optimize(pp);
#ifdef DEBUG
- print("start: %d\n", andp->first-pp->firstinst);
+ printf("start: %d\n", andp->first-pp->firstinst);
dump(pp);
#endif
out:
/*
* substitution list
*/
-#define uchar __reuchar
-typedef unsigned char uchar;
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
#define NSUBEXP 32
-#include "lib9.h"
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
#include "regexp9.h"
void
strcat(buf, s);
strcat(buf, "\n");
write(2, buf, strlen(buf));
- exits("regerr");
+ exit(1);
}
-#include "lib9.h"
+#include <stdlib.h>
#include "regexp9.h"
#include "regcomp.h"
break;
}
}
- r = *(uchar*)s;
+ r = *(unsigned char*)s;
if(r < Runeself)
n = 1;
else
/* mark space */
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
- if(relist0 == nil)
+ if(relist0 == NULL)
return -1;
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
- if(relist1 == nil){
+ if(relist1 == NULL){
free(relist1);
return -1;
}
AUTOLIB(regexp9)
#endif
-#include <utf.h>
+#include "utf.h"
typedef struct Resub Resub;
typedef struct Reclass Reclass;
-#include "lib9.h"
#include "regexp9.h"
/* substitute into one string using the matches from the last regexec() */
-#include "lib9.h"
#include "regexp9.h"
#include "regcomp.h"
-#include "lib9.h"
#include "regexp9.h"
/* substitute into one string using the matches from the last regexec() */
-#include "lib9.h"
-#include <regexp9.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "regexp9.h"
struct x
{
tp->p = regcomp(tp->re);
for(tp = t; tp->re; tp++){
- print("%s VIA %s", av[1], tp->re);
+ printf("%s VIA %s", av[1], tp->re);
memset(rs, 0, sizeof rs);
if(regexec(tp->p, av[1], rs, 10)){
regsub(tp->s, dst, sizeof dst, rs, 10);
- print(" sub %s -> %s", tp->s, dst);
+ printf(" sub %s -> %s", tp->s, dst);
}
- print("\n");
+ printf("\n");
}
exit(0);
}
-#include "lib9.h"
-#include <regexp9.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "regexp9.h"
int
main(int ac, char **av)
p = regcomp("[^a-z]");
s = "\n";
if(regexec(p, s, rs, 10))
- print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
+ printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
s = "0";
if(regexec(p, s, rs, 10))
- print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
+ printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
exit(0);
}
--- /dev/null
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ * Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <string.h>
+#include "utf.h"
+
+enum
+{
+ Bit1 = 7,
+ Bitx = 6,
+ Bit2 = 5,
+ Bit3 = 4,
+ Bit4 = 3,
+ Bit5 = 2,
+
+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+ T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
+
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
+ Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
+
+ Maskx = (1<<Bitx)-1, /* 0011 1111 */
+ Testx = Maskx ^ 0xFF, /* 1100 0000 */
+
+ Bad = Runeerror
+};
+
+int
+chartorune(Rune *rune, char *str)
+{
+ int c, c1, c2, c3;
+ long l;
+
+ /*
+ * one character sequence
+ * 00000-0007F => T1
+ */
+ c = *(unsigned char*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(unsigned char*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(unsigned char*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ /*
+ * four character sequence
+ * 10000-10FFFF => T4 Tx Tx Tx
+ */
+ if(UTFmax >= 4) {
+ c3 = *(unsigned char*)(str+3) ^ Tx;
+ if(c3 & Testx)
+ goto bad;
+ if(c < T5) {
+ l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+ if(l <= Rune3)
+ goto bad;
+ if(l > Runemax)
+ goto bad;
+ *rune = l;
+ return 4;
+ }
+ }
+
+ /*
+ * bad decoding
+ */
+bad:
+ *rune = Bad;
+ return 1;
+}
+
+Rune*
+runestrchr(Rune *s, Rune c)
+{
+ Rune c0 = c;
+ Rune c1;
+
+ if(c == 0) {
+ while(*s++)
+ ;
+ return s-1;
+ }
+
+ while((c1 = *s++))
+ if(c1 == c0)
+ return s-1;
+ return 0;
+}
+
+char*
+utfrune(char *s, long c)
+{
+ long c1;
+ Rune r;
+ int n;
+
+ if(c < Runesync) /* not part of utf sequence */
+ return strchr(s, c);
+
+ for(;;) {
+ c1 = *(unsigned char*)s;
+ if(c1 < Runeself) { /* one byte rune */
+ if(c1 == 0)
+ return 0;
+ if(c1 == c)
+ return s;
+ s++;
+ continue;
+ }
+ n = chartorune(&r, s);
+ if(r == c)
+ return s;
+ s += n;
+ }
+}
--- /dev/null
+#ifndef _UTF_H_
+#define _UTF_H_ 1
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef unsigned int Rune; /* 32 bits */
+
+enum
+{
+ UTFmax = 4, /* maximum bytes per rune */
+ Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
+ Runeself = 0x80, /* rune and UTF sequences are the same (<) */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF /* maximum rune value */
+};
+
+int chartorune(Rune *rune, char *str);
+Rune* runestrchr(Rune *s, Rune c);
+char* utfrune(char *s, long c);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif