From: David du Colombier <0intro@gmail.com> Date: Sat, 8 Jun 2013 19:10:00 +0000 (+0200) Subject: remove dependence on libfmt and libutf X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=7543e957fa4c890640146a1833f958ccaa7d3a5e;p=proto%2Flibregexp.git remove dependence on libfmt and libutf --- diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b269abe --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +CC=gcc +CFLAGS+=-Wall -Wextra -O3 -c -g +O=o +LIB=libregexp9.a + +RANLIB=true + +LIB=libregexp9.a + +OFILES=\ + regcomp.$O\ + regerror.$O\ + regexec.$O\ + regsub.$O\ + regaux.$O\ + rregexec.$O\ + rregsub.$O\ + utf.$O\ + +HFILES=\ + regexp9.h\ + regcomp.h\ + utf.h\ + +all: $(LIB) + +install: $(LIB) + mkdir -p $(PREFIX)/share/man/man3 $(PREFIX)/man/man7 + install -m 0644 regexp9.3 $(PREFIX)/share/man/man3/regexp9.3 + install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7 + mkdir -p $(PREFIX)/lib + install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + mkdir -p $(PREFIX)/include + install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h + +test: test.$O $(LIB) + $(CC) -o test test.$O $(LIB) + +test2: test2.$O $(LIB) + $(CC) -o test2 test2.$O $(LIB) + +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + $(RANLIB) $(LIB) + +%.$O: %.c + $(CC) $(CFLAGS) $*.c + +$(OFILES): $(HFILES) + +clean: + rm -f $(OFILES) $(LIB) diff --git a/regaux.c b/regaux.c index b854b5a..46fb250 100644 --- a/regaux.c +++ b/regaux.c @@ -1,4 +1,4 @@ -#include "lib9.h" +#include #include "regexp9.h" #include "regcomp.h" diff --git a/regcomp.c b/regcomp.c index 0967875..a8c4685 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1,4 +1,7 @@ -#include "lib9.h" +#include +#include +#include +#include #include "regexp9.h" #include "regcomp.h" @@ -288,12 +291,12 @@ dumpstack(void){ Node *stk; int *ip; - print("operators\n"); + printf("operators\n"); for(ip=atorstack; ipfirst->type, stk->last->type); + printf("0%o\t0%o\n", stk->first->type, stk->last->type); } static void @@ -304,22 +307,22 @@ dump(Reprog *pp) l = pp->firstinst; do{ - print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, + printf("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, l->u2.left-pp->firstinst, l->u1.right-pp->firstinst); if(l->type == RUNE) - print("\t%C\n", l->u1.r); + printf("\t%C\n", l->u1.r); else if(l->type == CCLASS || l->type == NCCLASS){ - print("\t["); + printf("\t["); if(l->type == NCCLASS) - print("^"); + printf("^"); for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2) if(p[0] == p[1]) - print("%C", p[0]); + printf("%C", p[0]); else - print("%C-%C", p[0], p[1]); - print("]\n"); + printf("%C-%C", p[0], p[1]); + printf("]\n"); } else - print("\n"); + printf("\n"); }while(l++->type); } #endif @@ -536,7 +539,7 @@ regcomp1(char *s, int literal, int dot_type) #endif pp = optimize(pp); #ifdef DEBUG - print("start: %d\n", andp->first-pp->firstinst); + printf("start: %d\n", andp->first-pp->firstinst); dump(pp); #endif out: diff --git a/regcomp.h b/regcomp.h index 4b9a483..a98e5be 100644 --- a/regcomp.h +++ b/regcomp.h @@ -1,8 +1,6 @@ /* * substitution list */ -#define uchar __reuchar -typedef unsigned char uchar; #define nelem(x) (sizeof(x)/sizeof((x)[0])) #define NSUBEXP 32 diff --git a/regerror.c b/regerror.c index 99ff0c7..b218da1 100644 --- a/regerror.c +++ b/regerror.c @@ -1,4 +1,6 @@ -#include "lib9.h" +#include +#include +#include #include "regexp9.h" void @@ -10,5 +12,5 @@ regerror(char *s) strcat(buf, s); strcat(buf, "\n"); write(2, buf, strlen(buf)); - exits("regerr"); + exit(1); } diff --git a/regexec.c b/regexec.c index a00fbcb..fa6accc 100644 --- a/regexec.c +++ b/regexec.c @@ -1,4 +1,4 @@ -#include "lib9.h" +#include #include "regexp9.h" #include "regcomp.h" @@ -62,7 +62,7 @@ regexec1(Reprog *progp, /* program to run */ break; } } - r = *(uchar*)s; + r = *(unsigned char*)s; if(r < Runeself) n = 1; else @@ -167,10 +167,10 @@ regexec2(Reprog *progp, /* program to run */ /* mark space */ relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist0 == nil) + if(relist0 == NULL) return -1; relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); - if(relist1 == nil){ + if(relist1 == NULL){ free(relist1); return -1; } diff --git a/regexp9.h b/regexp9.h index 20c0c3d..e78cb7d 100644 --- a/regexp9.h +++ b/regexp9.h @@ -8,7 +8,7 @@ extern "C" { AUTOLIB(regexp9) #endif -#include +#include "utf.h" typedef struct Resub Resub; typedef struct Reclass Reclass; diff --git a/regsub.c b/regsub.c index 579d124..c9358c3 100644 --- a/regsub.c +++ b/regsub.c @@ -1,4 +1,3 @@ -#include "lib9.h" #include "regexp9.h" /* substitute into one string using the matches from the last regexec() */ diff --git a/rregexec.c b/rregexec.c index 16d95e6..e273cba 100644 --- a/rregexec.c +++ b/rregexec.c @@ -1,4 +1,3 @@ -#include "lib9.h" #include "regexp9.h" #include "regcomp.h" diff --git a/rregsub.c b/rregsub.c index 5a4a564..65e975e 100644 --- a/rregsub.c +++ b/rregsub.c @@ -1,4 +1,3 @@ -#include "lib9.h" #include "regexp9.h" /* substitute into one string using the matches from the last regexec() */ diff --git a/test.c b/test.c index f6bea53..257efed 100644 --- a/test.c +++ b/test.c @@ -1,5 +1,7 @@ -#include "lib9.h" -#include +#include +#include +#include +#include "regexp9.h" struct x { @@ -33,13 +35,13 @@ main(int ac, char **av) tp->p = regcomp(tp->re); for(tp = t; tp->re; tp++){ - print("%s VIA %s", av[1], tp->re); + printf("%s VIA %s", av[1], tp->re); memset(rs, 0, sizeof rs); if(regexec(tp->p, av[1], rs, 10)){ regsub(tp->s, dst, sizeof dst, rs, 10); - print(" sub %s -> %s", tp->s, dst); + printf(" sub %s -> %s", tp->s, dst); } - print("\n"); + printf("\n"); } exit(0); } diff --git a/test2.c b/test2.c index 62d5213..a83451a 100644 --- a/test2.c +++ b/test2.c @@ -1,5 +1,7 @@ -#include "lib9.h" -#include +#include +#include +#include +#include "regexp9.h" int main(int ac, char **av) @@ -11,9 +13,9 @@ main(int ac, char **av) p = regcomp("[^a-z]"); s = "\n"; if(regexec(p, s, rs, 10)) - print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep); + printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep); s = "0"; if(regexec(p, s, rs, 10)) - print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep); + printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep); exit(0); } diff --git a/utf.c b/utf.c new file mode 100644 index 0000000..f4f5922 --- /dev/null +++ b/utf.c @@ -0,0 +1,162 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include +#include "utf.h" + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + Bit5 = 2, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ + + Maskx = (1< T1 + */ + c = *(unsigned char*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(unsigned char*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(unsigned char*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * four character sequence + * 10000-10FFFF => T4 Tx Tx Tx + */ + if(UTFmax >= 4) { + c3 = *(unsigned char*)(str+3) ^ Tx; + if(c3 & Testx) + goto bad; + if(c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if(l <= Rune3) + goto bad; + if(l > Runemax) + goto bad; + *rune = l; + return 4; + } + } + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +Rune* +runestrchr(Rune *s, Rune c) +{ + Rune c0 = c; + Rune c1; + + if(c == 0) { + while(*s++) + ; + return s-1; + } + + while((c1 = *s++)) + if(c1 == c0) + return s-1; + return 0; +} + +char* +utfrune(char *s, long c) +{ + long c1; + Rune r; + int n; + + if(c < Runesync) /* not part of utf sequence */ + return strchr(s, c); + + for(;;) { + c1 = *(unsigned char*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return 0; + if(c1 == c) + return s; + s++; + continue; + } + n = chartorune(&r, s); + if(r == c) + return s; + s += n; + } +} diff --git a/utf.h b/utf.h new file mode 100644 index 0000000..3652154 --- /dev/null +++ b/utf.h @@ -0,0 +1,25 @@ +#ifndef _UTF_H_ +#define _UTF_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif + +typedef unsigned int Rune; /* 32 bits */ + +enum +{ + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF /* maximum rune value */ +}; + +int chartorune(Rune *rune, char *str); +Rune* runestrchr(Rune *s, Rune c); +char* utfrune(char *s, long c); + +#if defined(__cplusplus) +} +#endif +#endif