]> git.mdlowis.com Git - proto/libregexp.git/commitdiff
remove dependence on libfmt and libutf
authorDavid du Colombier <0intro@gmail.com>
Sat, 8 Jun 2013 19:10:00 +0000 (21:10 +0200)
committerDavid du Colombier <0intro@gmail.com>
Sat, 8 Jun 2013 19:10:00 +0000 (21:10 +0200)
14 files changed:
Makefile [new file with mode: 0644]
regaux.c
regcomp.c
regcomp.h
regerror.c
regexec.c
regexp9.h
regsub.c
rregexec.c
rregsub.c
test.c
test2.c
utf.c [new file with mode: 0644]
utf.h [new file with mode: 0644]

diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..b269abe
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,52 @@
+CC=gcc
+CFLAGS+=-Wall -Wextra -O3 -c -g
+O=o
+LIB=libregexp9.a
+
+RANLIB=true
+
+LIB=libregexp9.a
+
+OFILES=\
+       regcomp.$O\
+       regerror.$O\
+       regexec.$O\
+       regsub.$O\
+       regaux.$O\
+       rregexec.$O\
+       rregsub.$O\
+       utf.$O\
+
+HFILES=\
+       regexp9.h\
+       regcomp.h\
+       utf.h\
+
+all: $(LIB)
+
+install: $(LIB)
+       mkdir -p $(PREFIX)/share/man/man3 $(PREFIX)/man/man7
+       install -m 0644 regexp9.3 $(PREFIX)/share/man/man3/regexp9.3
+       install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7
+       mkdir -p $(PREFIX)/lib
+       install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB)
+       mkdir -p $(PREFIX)/include
+       install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h
+
+test: test.$O $(LIB)
+       $(CC) -o test test.$O $(LIB)
+
+test2: test2.$O $(LIB)
+       $(CC) -o test2 test2.$O $(LIB)
+
+$(LIB): $(OFILES)
+       $(AR) $(ARFLAGS) $(LIB) $(OFILES)
+       $(RANLIB) $(LIB)
+
+%.$O: %.c
+       $(CC) $(CFLAGS) $*.c
+
+$(OFILES): $(HFILES)
+
+clean:
+       rm -f $(OFILES) $(LIB)
index b854b5ac072274c9ad44ba8c4fc669fab65468fc..46fb25070301fb180570bb048d3f8c71af7c2ac7 100644 (file)
--- a/regaux.c
+++ b/regaux.c
@@ -1,4 +1,4 @@
-#include "lib9.h"
+#include <string.h>
 #include "regexp9.h"
 #include "regcomp.h"
 
index 09678755e411a05732348468e03af0aa83313c69..a8c46856e85e302ed91329929a384a3c4293bb0e 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -1,4 +1,7 @@
-#include "lib9.h"
+#include <stdio.h>
+#include <setjmp.h>
+#include <string.h>
+#include <stdlib.h>
 #include "regexp9.h"
 #include "regcomp.h"
 
@@ -288,12 +291,12 @@ dumpstack(void){
        Node *stk;
        int *ip;
 
-       print("operators\n");
+       printf("operators\n");
        for(ip=atorstack; ip<atorp; ip++)
-               print("0%o\n", *ip);
-       print("operands\n");
+               printf("0%o\n", *ip);
+       printf("operands\n");
        for(stk=andstack; stk<andp; stk++)
-               print("0%o\t0%o\n", stk->first->type, stk->last->type);
+               printf("0%o\t0%o\n", stk->first->type, stk->last->type);
 }
 
 static void
@@ -304,22 +307,22 @@ dump(Reprog *pp)
 
        l = pp->firstinst;
        do{
-               print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
+               printf("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
                        l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
                if(l->type == RUNE)
-                       print("\t%C\n", l->u1.r);
+                       printf("\t%C\n", l->u1.r);
                else if(l->type == CCLASS || l->type == NCCLASS){
-                       print("\t[");
+                       printf("\t[");
                        if(l->type == NCCLASS)
-                               print("^");
+                               printf("^");
                        for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
                                if(p[0] == p[1])
-                                       print("%C", p[0]);
+                                       printf("%C", p[0]);
                                else
-                                       print("%C-%C", p[0], p[1]);
-                       print("]\n");
+                                       printf("%C-%C", p[0], p[1]);
+                       printf("]\n");
                } else
-                       print("\n");
+                       printf("\n");
        }while(l++->type);
 }
 #endif
@@ -536,7 +539,7 @@ regcomp1(char *s, int literal, int dot_type)
 #endif
        pp = optimize(pp);
 #ifdef DEBUG
-       print("start: %d\n", andp->first-pp->firstinst);
+       printf("start: %d\n", andp->first-pp->firstinst);
        dump(pp);
 #endif
 out:
index 4b9a483b90d6ee2ea229a970bf77e4f9afb62906..a98e5bef42a08497621893856f1d1aaf081128bc 100644 (file)
--- a/regcomp.h
+++ b/regcomp.h
@@ -1,8 +1,6 @@
 /*
  *  substitution list
  */
-#define uchar __reuchar
-typedef unsigned char uchar;
 #define nelem(x) (sizeof(x)/sizeof((x)[0]))
 
 #define NSUBEXP 32
index 99ff0c74b855898fd120e1f191106fe16c441ae5..b218da156fac296f44b0fb375e433a22fcd2f0f7 100644 (file)
@@ -1,4 +1,6 @@
-#include "lib9.h"
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
 #include "regexp9.h"
 
 void
@@ -10,5 +12,5 @@ regerror(char *s)
        strcat(buf, s);
        strcat(buf, "\n");
        write(2, buf, strlen(buf));
-       exits("regerr");
+       exit(1);
 }
index a00fbcbc9b8cbee2ce787863edcce9c14ebea349..fa6accc000483cdeb1ed5b533108b2c38e7e1fa8 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1,4 +1,4 @@
-#include "lib9.h"
+#include <stdlib.h>
 #include "regexp9.h"
 #include "regcomp.h"
 
@@ -62,7 +62,7 @@ regexec1(Reprog *progp,       /* program to run */
                                break;
                        }
                }
-               r = *(uchar*)s;
+               r = *(unsigned char*)s;
                if(r < Runeself)
                        n = 1;
                else
@@ -167,10 +167,10 @@ regexec2(Reprog *progp,   /* program to run */
 
        /* mark space */
        relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
-       if(relist0 == nil)
+       if(relist0 == NULL)
                return -1;
        relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
-       if(relist1 == nil){
+       if(relist1 == NULL){
                free(relist1);
                return -1;
        }
index 20c0c3daa3959a5023050270478526d8e7132aab..e78cb7db553720d7afb2c46644856c21693b74bf 100644 (file)
--- a/regexp9.h
+++ b/regexp9.h
@@ -8,7 +8,7 @@ extern "C" {
 AUTOLIB(regexp9)
 #endif
 
-#include <utf.h>
+#include "utf.h"
 
 typedef struct Resub           Resub;
 typedef struct Reclass         Reclass;
index 579d12432739447c083038a0dda1b18490b39ca0..c9358c37f2e4f764edb33800358ebb705e3ab308 100644 (file)
--- a/regsub.c
+++ b/regsub.c
@@ -1,4 +1,3 @@
-#include "lib9.h"
 #include "regexp9.h"
 
 /* substitute into one string using the matches from the last regexec() */
index 16d95e674792b8c72846db2f483b078aa427f386..e273cba989ccc504d8d046f88a53b436fb7b8ecb 100644 (file)
@@ -1,4 +1,3 @@
-#include "lib9.h"
 #include "regexp9.h"
 #include "regcomp.h"
 
index 5a4a564d87a41559123a05d09e297727b50168b1..65e975ea8b8b915132949a912861e2b05df33d65 100644 (file)
--- a/rregsub.c
+++ b/rregsub.c
@@ -1,4 +1,3 @@
-#include "lib9.h"
 #include "regexp9.h"
 
 /* substitute into one string using the matches from the last regexec() */
diff --git a/test.c b/test.c
index f6bea534064ed0ab7c44dab7eb12928fa766dbc3..257efed7086b03f9dbc40fec117f17ee3a3e5f7e 100644 (file)
--- a/test.c
+++ b/test.c
@@ -1,5 +1,7 @@
-#include "lib9.h"
-#include <regexp9.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "regexp9.h"
 
 struct x
 {
@@ -33,13 +35,13 @@ main(int ac, char **av)
                tp->p = regcomp(tp->re);
 
        for(tp = t; tp->re; tp++){
-               print("%s VIA %s", av[1], tp->re);
+               printf("%s VIA %s", av[1], tp->re);
                memset(rs, 0, sizeof rs);
                if(regexec(tp->p, av[1], rs, 10)){
                        regsub(tp->s, dst, sizeof dst, rs, 10);
-                       print(" sub %s -> %s", tp->s, dst);
+                       printf(" sub %s -> %s", tp->s, dst);
                }
-               print("\n");
+               printf("\n");
        }
        exit(0);
 }
diff --git a/test2.c b/test2.c
index 62d5213aec81b35cb0a69ae59e3a00ecf027979d..a83451aa4d14832b6b430e3cd57cb97549f18b74 100644 (file)
--- a/test2.c
+++ b/test2.c
@@ -1,5 +1,7 @@
-#include "lib9.h"
-#include <regexp9.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "regexp9.h"
 
 int
 main(int ac, char **av)
@@ -11,9 +13,9 @@ main(int ac, char **av)
        p = regcomp("[^a-z]");
        s = "\n";
        if(regexec(p, s, rs, 10))
-               print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
+               printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
        s = "0";
        if(regexec(p, s, rs, 10))
-               print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
+               printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
        exit(0);
 }
diff --git a/utf.c b/utf.c
new file mode 100644 (file)
index 0000000..f4f5922
--- /dev/null
+++ b/utf.c
@@ -0,0 +1,162 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <string.h>
+#include "utf.h"
+
+enum
+{
+       Bit1    = 7,
+       Bitx    = 6,
+       Bit2    = 5,
+       Bit3    = 4,
+       Bit4    = 3,
+       Bit5    = 2,
+
+       T1      = ((1<<(Bit1+1))-1) ^ 0xFF,     /* 0000 0000 */
+       Tx      = ((1<<(Bitx+1))-1) ^ 0xFF,     /* 1000 0000 */
+       T2      = ((1<<(Bit2+1))-1) ^ 0xFF,     /* 1100 0000 */
+       T3      = ((1<<(Bit3+1))-1) ^ 0xFF,     /* 1110 0000 */
+       T4      = ((1<<(Bit4+1))-1) ^ 0xFF,     /* 1111 0000 */
+       T5      = ((1<<(Bit5+1))-1) ^ 0xFF,     /* 1111 1000 */
+
+       Rune1   = (1<<(Bit1+0*Bitx))-1,         /* 0000 0000 0000 0000 0111 1111 */
+       Rune2   = (1<<(Bit2+1*Bitx))-1,         /* 0000 0000 0000 0111 1111 1111 */
+       Rune3   = (1<<(Bit3+2*Bitx))-1,         /* 0000 0000 1111 1111 1111 1111 */
+       Rune4   = (1<<(Bit4+3*Bitx))-1,         /* 0011 1111 1111 1111 1111 1111 */
+
+       Maskx   = (1<<Bitx)-1,                  /* 0011 1111 */
+       Testx   = Maskx ^ 0xFF,                 /* 1100 0000 */
+
+       Bad     = Runeerror
+};
+
+int
+chartorune(Rune *rune, char *str)
+{
+       int c, c1, c2, c3;
+       long l;
+
+       /*
+        * one character sequence
+        *      00000-0007F => T1
+        */
+       c = *(unsigned char*)str;
+       if(c < Tx) {
+               *rune = c;
+               return 1;
+       }
+
+       /*
+        * two character sequence
+        *      0080-07FF => T2 Tx
+        */
+       c1 = *(unsigned char*)(str+1) ^ Tx;
+       if(c1 & Testx)
+               goto bad;
+       if(c < T3) {
+               if(c < T2)
+                       goto bad;
+               l = ((c << Bitx) | c1) & Rune2;
+               if(l <= Rune1)
+                       goto bad;
+               *rune = l;
+               return 2;
+       }
+
+       /*
+        * three character sequence
+        *      0800-FFFF => T3 Tx Tx
+        */
+       c2 = *(unsigned char*)(str+2) ^ Tx;
+       if(c2 & Testx)
+               goto bad;
+       if(c < T4) {
+               l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+               if(l <= Rune2)
+                       goto bad;
+               *rune = l;
+               return 3;
+       }
+
+       /*
+        * four character sequence
+        *      10000-10FFFF => T4 Tx Tx Tx
+        */
+       if(UTFmax >= 4) {
+               c3 = *(unsigned char*)(str+3) ^ Tx;
+               if(c3 & Testx)
+                       goto bad;
+               if(c < T5) {
+                       l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+                       if(l <= Rune3)
+                               goto bad;
+                       if(l > Runemax)
+                               goto bad;
+                       *rune = l;
+                       return 4;
+               }
+       }
+
+       /*
+        * bad decoding
+        */
+bad:
+       *rune = Bad;
+       return 1;
+}
+
+Rune*
+runestrchr(Rune *s, Rune c)
+{
+       Rune c0 = c;
+       Rune c1;
+
+       if(c == 0) {
+               while(*s++)
+                       ;
+               return s-1;
+       }
+
+       while((c1 = *s++))
+               if(c1 == c0)
+                       return s-1;
+       return 0;
+}
+
+char*
+utfrune(char *s, long c)
+{
+       long c1;
+       Rune r;
+       int n;
+
+       if(c < Runesync)                /* not part of utf sequence */
+               return strchr(s, c);
+
+       for(;;) {
+               c1 = *(unsigned char*)s;
+               if(c1 < Runeself) {     /* one byte rune */
+                       if(c1 == 0)
+                               return 0;
+                       if(c1 == c)
+                               return s;
+                       s++;
+                       continue;
+               }
+               n = chartorune(&r, s);
+               if(r == c)
+                       return s;
+               s += n;
+       }
+}
diff --git a/utf.h b/utf.h
new file mode 100644 (file)
index 0000000..3652154
--- /dev/null
+++ b/utf.h
@@ -0,0 +1,25 @@
+#ifndef _UTF_H_
+#define _UTF_H_ 1
+#if defined(__cplusplus)
+extern "C" { 
+#endif
+
+typedef unsigned int Rune;     /* 32 bits */
+
+enum
+{
+       UTFmax          = 4,            /* maximum bytes per rune */
+       Runesync        = 0x80,         /* cannot represent part of a UTF sequence (<) */
+       Runeself        = 0x80,         /* rune and UTF sequences are the same (<) */
+       Runeerror       = 0xFFFD,       /* decoding error in UTF */
+       Runemax         = 0x10FFFF      /* maximum rune value */
+};
+
+int    chartorune(Rune *rune, char *str);
+Rune*  runestrchr(Rune *s, Rune c);
+char*  utfrune(char *s, long c);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif