From: David du Colombier <0intro@gmail.com>
Date: Sat, 8 Jun 2013 19:10:00 +0000 (+0200)
Subject: remove dependence on libfmt and libutf
X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=7543e957fa4c890640146a1833f958ccaa7d3a5e;p=proto%2Flibregexp.git

remove dependence on libfmt and libutf
---

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..b269abe
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,52 @@
+CC=gcc
+CFLAGS+=-Wall -Wextra -O3 -c -g
+O=o
+LIB=libregexp9.a
+
+RANLIB=true
+
+LIB=libregexp9.a
+
+OFILES=\
+	regcomp.$O\
+	regerror.$O\
+	regexec.$O\
+	regsub.$O\
+	regaux.$O\
+	rregexec.$O\
+	rregsub.$O\
+	utf.$O\
+
+HFILES=\
+	regexp9.h\
+	regcomp.h\
+	utf.h\
+
+all: $(LIB)
+
+install: $(LIB)
+	mkdir -p $(PREFIX)/share/man/man3 $(PREFIX)/man/man7
+	install -m 0644 regexp9.3 $(PREFIX)/share/man/man3/regexp9.3
+	install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7
+	mkdir -p $(PREFIX)/lib
+	install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB)
+	mkdir -p $(PREFIX)/include
+	install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h
+
+test: test.$O $(LIB)
+	$(CC) -o test test.$O $(LIB)
+
+test2: test2.$O $(LIB)
+	$(CC) -o test2 test2.$O $(LIB)
+
+$(LIB): $(OFILES)
+	$(AR) $(ARFLAGS) $(LIB) $(OFILES)
+	$(RANLIB) $(LIB)
+
+%.$O: %.c
+	$(CC) $(CFLAGS) $*.c
+
+$(OFILES): $(HFILES)
+
+clean:
+	rm -f $(OFILES) $(LIB)
diff --git a/regaux.c b/regaux.c
index b854b5a..46fb250 100644
--- a/regaux.c
+++ b/regaux.c
@@ -1,4 +1,4 @@
-#include "lib9.h"
+#include <string.h>
 #include "regexp9.h"
 #include "regcomp.h"
 
diff --git a/regcomp.c b/regcomp.c
index 0967875..a8c4685 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1,4 +1,7 @@
-#include "lib9.h"
+#include <stdio.h>
+#include <setjmp.h>
+#include <string.h>
+#include <stdlib.h>
 #include "regexp9.h"
 #include "regcomp.h"
 
@@ -288,12 +291,12 @@ dumpstack(void){
 	Node *stk;
 	int *ip;
 
-	print("operators\n");
+	printf("operators\n");
 	for(ip=atorstack; ip<atorp; ip++)
-		print("0%o\n", *ip);
-	print("operands\n");
+		printf("0%o\n", *ip);
+	printf("operands\n");
 	for(stk=andstack; stk<andp; stk++)
-		print("0%o\t0%o\n", stk->first->type, stk->last->type);
+		printf("0%o\t0%o\n", stk->first->type, stk->last->type);
 }
 
 static	void
@@ -304,22 +307,22 @@ dump(Reprog *pp)
 
 	l = pp->firstinst;
 	do{
-		print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
+		printf("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
 			l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
 		if(l->type == RUNE)
-			print("\t%C\n", l->u1.r);
+			printf("\t%C\n", l->u1.r);
 		else if(l->type == CCLASS || l->type == NCCLASS){
-			print("\t[");
+			printf("\t[");
 			if(l->type == NCCLASS)
-				print("^");
+				printf("^");
 			for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
 				if(p[0] == p[1])
-					print("%C", p[0]);
+					printf("%C", p[0]);
 				else
-					print("%C-%C", p[0], p[1]);
-			print("]\n");
+					printf("%C-%C", p[0], p[1]);
+			printf("]\n");
 		} else
-			print("\n");
+			printf("\n");
 	}while(l++->type);
 }
 #endif
@@ -536,7 +539,7 @@ regcomp1(char *s, int literal, int dot_type)
 #endif
 	pp = optimize(pp);
 #ifdef DEBUG
-	print("start: %d\n", andp->first-pp->firstinst);
+	printf("start: %d\n", andp->first-pp->firstinst);
 	dump(pp);
 #endif
 out:
diff --git a/regcomp.h b/regcomp.h
index 4b9a483..a98e5be 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -1,8 +1,6 @@
 /*
  *  substitution list
  */
-#define uchar __reuchar
-typedef unsigned char uchar;
 #define nelem(x) (sizeof(x)/sizeof((x)[0]))
 
 #define NSUBEXP 32
diff --git a/regerror.c b/regerror.c
index 99ff0c7..b218da1 100644
--- a/regerror.c
+++ b/regerror.c
@@ -1,4 +1,6 @@
-#include "lib9.h"
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
 #include "regexp9.h"
 
 void
@@ -10,5 +12,5 @@ regerror(char *s)
 	strcat(buf, s);
 	strcat(buf, "\n");
 	write(2, buf, strlen(buf));
-	exits("regerr");
+	exit(1);
 }
diff --git a/regexec.c b/regexec.c
index a00fbcb..fa6accc 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1,4 +1,4 @@
-#include "lib9.h"
+#include <stdlib.h>
 #include "regexp9.h"
 #include "regcomp.h"
 
@@ -62,7 +62,7 @@ regexec1(Reprog *progp,	/* program to run */
 				break;
 			}
 		}
-		r = *(uchar*)s;
+		r = *(unsigned char*)s;
 		if(r < Runeself)
 			n = 1;
 		else
@@ -167,10 +167,10 @@ regexec2(Reprog *progp,	/* program to run */
 
 	/* mark space */
 	relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
-	if(relist0 == nil)
+	if(relist0 == NULL)
 		return -1;
 	relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
-	if(relist1 == nil){
+	if(relist1 == NULL){
 		free(relist1);
 		return -1;
 	}
diff --git a/regexp9.h b/regexp9.h
index 20c0c3d..e78cb7d 100644
--- a/regexp9.h
+++ b/regexp9.h
@@ -8,7 +8,7 @@ extern "C" {
 AUTOLIB(regexp9)
 #endif
 
-#include <utf.h>
+#include "utf.h"
 
 typedef struct Resub		Resub;
 typedef struct Reclass		Reclass;
diff --git a/regsub.c b/regsub.c
index 579d124..c9358c3 100644
--- a/regsub.c
+++ b/regsub.c
@@ -1,4 +1,3 @@
-#include "lib9.h"
 #include "regexp9.h"
 
 /* substitute into one string using the matches from the last regexec() */
diff --git a/rregexec.c b/rregexec.c
index 16d95e6..e273cba 100644
--- a/rregexec.c
+++ b/rregexec.c
@@ -1,4 +1,3 @@
-#include "lib9.h"
 #include "regexp9.h"
 #include "regcomp.h"
 
diff --git a/rregsub.c b/rregsub.c
index 5a4a564..65e975e 100644
--- a/rregsub.c
+++ b/rregsub.c
@@ -1,4 +1,3 @@
-#include "lib9.h"
 #include "regexp9.h"
 
 /* substitute into one string using the matches from the last regexec() */
diff --git a/test.c b/test.c
index f6bea53..257efed 100644
--- a/test.c
+++ b/test.c
@@ -1,5 +1,7 @@
-#include "lib9.h"
-#include <regexp9.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "regexp9.h"
 
 struct x
 {
@@ -33,13 +35,13 @@ main(int ac, char **av)
 		tp->p = regcomp(tp->re);
 
 	for(tp = t; tp->re; tp++){
-		print("%s VIA %s", av[1], tp->re);
+		printf("%s VIA %s", av[1], tp->re);
 		memset(rs, 0, sizeof rs);
 		if(regexec(tp->p, av[1], rs, 10)){
 			regsub(tp->s, dst, sizeof dst, rs, 10);
-			print(" sub %s -> %s", tp->s, dst);
+			printf(" sub %s -> %s", tp->s, dst);
 		}
-		print("\n");
+		printf("\n");
 	}
 	exit(0);
 }
diff --git a/test2.c b/test2.c
index 62d5213..a83451a 100644
--- a/test2.c
+++ b/test2.c
@@ -1,5 +1,7 @@
-#include "lib9.h"
-#include <regexp9.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "regexp9.h"
 
 int
 main(int ac, char **av)
@@ -11,9 +13,9 @@ main(int ac, char **av)
 	p = regcomp("[^a-z]");
 	s = "\n";
 	if(regexec(p, s, rs, 10))
-		print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
+		printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
 	s = "0";
 	if(regexec(p, s, rs, 10))
-		print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
+		printf("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep);
 	exit(0);
 }
diff --git a/utf.c b/utf.c
new file mode 100644
index 0000000..f4f5922
--- /dev/null
+++ b/utf.c
@@ -0,0 +1,162 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+#include <string.h>
+#include "utf.h"
+
+enum
+{
+	Bit1	= 7,
+	Bitx	= 6,
+	Bit2	= 5,
+	Bit3	= 4,
+	Bit4	= 3,
+	Bit5	= 2,
+
+	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
+	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
+	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
+	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
+	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
+	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
+
+	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0000 0000 0111 1111 */
+	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0000 0000 0111 1111 1111 */
+	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 0000 0000 1111 1111 1111 1111 */
+	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0011 1111 1111 1111 1111 1111 */
+
+	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
+	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
+
+	Bad	= Runeerror
+};
+
+int
+chartorune(Rune *rune, char *str)
+{
+	int c, c1, c2, c3;
+	long l;
+
+	/*
+	 * one character sequence
+	 *	00000-0007F => T1
+	 */
+	c = *(unsigned char*)str;
+	if(c < Tx) {
+		*rune = c;
+		return 1;
+	}
+
+	/*
+	 * two character sequence
+	 *	0080-07FF => T2 Tx
+	 */
+	c1 = *(unsigned char*)(str+1) ^ Tx;
+	if(c1 & Testx)
+		goto bad;
+	if(c < T3) {
+		if(c < T2)
+			goto bad;
+		l = ((c << Bitx) | c1) & Rune2;
+		if(l <= Rune1)
+			goto bad;
+		*rune = l;
+		return 2;
+	}
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	c2 = *(unsigned char*)(str+2) ^ Tx;
+	if(c2 & Testx)
+		goto bad;
+	if(c < T4) {
+		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+		if(l <= Rune2)
+			goto bad;
+		*rune = l;
+		return 3;
+	}
+
+	/*
+	 * four character sequence
+	 *	10000-10FFFF => T4 Tx Tx Tx
+	 */
+	if(UTFmax >= 4) {
+		c3 = *(unsigned char*)(str+3) ^ Tx;
+		if(c3 & Testx)
+			goto bad;
+		if(c < T5) {
+			l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+			if(l <= Rune3)
+				goto bad;
+			if(l > Runemax)
+				goto bad;
+			*rune = l;
+			return 4;
+		}
+	}
+
+	/*
+	 * bad decoding
+	 */
+bad:
+	*rune = Bad;
+	return 1;
+}
+
+Rune*
+runestrchr(Rune *s, Rune c)
+{
+	Rune c0 = c;
+	Rune c1;
+
+	if(c == 0) {
+		while(*s++)
+			;
+		return s-1;
+	}
+
+	while((c1 = *s++))
+		if(c1 == c0)
+			return s-1;
+	return 0;
+}
+
+char*
+utfrune(char *s, long c)
+{
+	long c1;
+	Rune r;
+	int n;
+
+	if(c < Runesync)		/* not part of utf sequence */
+		return strchr(s, c);
+
+	for(;;) {
+		c1 = *(unsigned char*)s;
+		if(c1 < Runeself) {	/* one byte rune */
+			if(c1 == 0)
+				return 0;
+			if(c1 == c)
+				return s;
+			s++;
+			continue;
+		}
+		n = chartorune(&r, s);
+		if(r == c)
+			return s;
+		s += n;
+	}
+}
diff --git a/utf.h b/utf.h
new file mode 100644
index 0000000..3652154
--- /dev/null
+++ b/utf.h
@@ -0,0 +1,25 @@
+#ifndef _UTF_H_
+#define _UTF_H_ 1
+#if defined(__cplusplus)
+extern "C" { 
+#endif
+
+typedef unsigned int Rune;	/* 32 bits */
+
+enum
+{
+	UTFmax		= 4,		/* maximum bytes per rune */
+	Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
+	Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
+	Runeerror	= 0xFFFD,	/* decoding error in UTF */
+	Runemax		= 0x10FFFF	/* maximum rune value */
+};
+
+int	chartorune(Rune *rune, char *str);
+Rune*	runestrchr(Rune *s, Rune c);
+char*	utfrune(char *s, long c);
+
+#if defined(__cplusplus)
+}
+#endif
+#endif