From 89d00419c3f3bf1c9e72817cfd9591d256aedf92 Mon Sep 17 00:00:00 2001 From: David du Colombier <0intro@gmail.com> Date: Sat, 8 Jun 2013 21:40:00 +0200 Subject: [PATCH] import manuals from plan9port --- Makefile | 2 +- regexp9.3 | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ regexp9.7 | 133 ++++++++++++++++++++++++++++++++++ 3 files changed, 346 insertions(+), 1 deletion(-) create mode 100644 regexp9.3 create mode 100644 regexp9.7 diff --git a/Makefile b/Makefile index b269abe..0591ec2 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ all: $(LIB) install: $(LIB) mkdir -p $(PREFIX)/share/man/man3 $(PREFIX)/man/man7 install -m 0644 regexp9.3 $(PREFIX)/share/man/man3/regexp9.3 - install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7 + install -m 0644 regexp9.7 $(PREFIX)/share/man/man7/regexp9.7 mkdir -p $(PREFIX)/lib install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) mkdir -p $(PREFIX)/include diff --git a/regexp9.3 b/regexp9.3 new file mode 100644 index 0000000..069e1d2 --- /dev/null +++ b/regexp9.3 @@ -0,0 +1,212 @@ +.TH REGEXP 3 +.SH NAME +regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression +.SH SYNOPSIS +.B #include +.br +.B #include +.br +.B #include +.PP +.ta \w'\fLRegprog 'u +.B +Reprog *regcomp(char *exp) +.PP +.B +Reprog *regcomplit(char *exp) +.PP +.B +Reprog *regcompnl(char *exp) +.PP +.nf +.B +int regexec(Reprog *prog, char *string, Resub *match, int msize) +.PP +.nf +.B +void regsub(char *source, char *dest, int dlen, Resub *match, int msize) +.PP +.nf +.B +int rregexec(Reprog *prog, Rune *string, Resub *match, int msize) +.PP +.nf +.B +void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize) +.PP +.B +void regerror(char *msg) +.SH DESCRIPTION +.I Regcomp +compiles a +regular expression and returns +a pointer to the generated description. +The space is allocated by +.IR malloc (3) +and may be released by +.IR free . +Regular expressions are exactly as in +.IR regexp (7). +.PP +.I Regcomplit +is like +.I regcomp +except that all characters are treated literally. +.I Regcompnl +is like +.I regcomp +except that the +.B . +metacharacter matches all characters, including newlines. +.PP +.I Regexec +matches a null-terminated +.I string +against the compiled regular expression in +.IR prog . +If it matches, +.I regexec +returns +.B 1 +and fills in the array +.I match +with character pointers to the substrings of +.I string +that correspond to the +parenthesized subexpressions of +.IR exp : +.BI match[ i ].sp +points to the beginning and +.BI match[ i ].ep +points just beyond +the end of the +.IR i th +substring. +(Subexpression +.I i +begins at the +.IR i th +left parenthesis, counting from 1.) +Pointers in +.B match[0] +pick out the substring that corresponds to +the whole regular expression. +Unused elements of +.I match +are filled with zeros. +Matches involving +.LR * , +.LR + , +and +.L ? +are extended as far as possible. +The number of array elements in +.I match +is given by +.IR msize . +The structure of elements of +.I match +is: +.IP +.EX +typedef struct { + union { + char *sp; + Rune *rsp; + } s; + union { + char *ep; + Rune *rep; + } e; +} Resub; +.EE +.LP +If +.B match[0].s.sp +is nonzero on entry, +.I regexec +starts matching at that point within +.IR string . +If +.B match[0].e.ep +is nonzero on entry, +the last character matched is the one +preceding that point. +.PP +.I Regsub +places in +.I dest +a substitution instance of +.I source +in the context of the last +.I regexec +performed using +.IR match . +Each instance of +.BI \e n\f1, +where +.I n +is a digit, is replaced by the +string delimited by +.BI match[ n ].sp +and +.BI match[ n ].ep\f1. +Each instance of +.L & +is replaced by the string delimited by +.B match[0].sp +and +.BR match[0].ep . +The substitution will always be null terminated and +trimmed to fit into dlen bytes. +.PP +.IR Regerror , +called whenever an error is detected in +.IR regcomp , +writes the string +.I msg +on the standard error file and exits. +.I Regerror +can be replaced to perform +special error processing. +If the user supplied +.I regerror +returns rather than exits, +.I regcomp +will return 0. +.PP +.I Rregexec +and +.I rregsub +are variants of +.I regexec +and +.I regsub +that use strings of +.B Runes +instead of strings of +.BR chars . +With these routines, the +.I rsp +and +.I rep +fields of the +.I match +array elements should be used. +.SH SOURCE +.B \*9/src/libregexp +.SH "SEE ALSO" +.IR grep (1) +.SH DIAGNOSTICS +.I Regcomp +returns +.B 0 +for an illegal expression +or other failure. +.I Regexec +returns 0 +if +.I string +is not matched. +.SH BUGS +There is no way to specify or match a NUL character; NULs terminate patterns and strings. diff --git a/regexp9.7 b/regexp9.7 new file mode 100644 index 0000000..91e73ad --- /dev/null +++ b/regexp9.7 @@ -0,0 +1,133 @@ +.TH REGEXP 7 +.SH NAME +regexp \- Plan 9 regular expression notation +.SH DESCRIPTION +This manual page describes the regular expression +syntax used by the Plan 9 regular expression library +.IR regexp (3). +It is the form used by +.IR egrep (1) +before +.I egrep +got complicated. +.PP +A +.I "regular expression" +specifies +a set of strings of characters. +A member of this set of strings is said to be +.I matched +by the regular expression. In many applications +a delimiter character, commonly +.LR / , +bounds a regular expression. +In the following specification for regular expressions +the word `character' means any character (rune) but newline. +.PP +The syntax for a regular expression +.B e0 +is +.IP +.EX +e3: literal | charclass | '.' | '^' | '$' | '(' e0 ')' + +e2: e3 + | e2 REP + +REP: '*' | '+' | '?' + +e1: e2 + | e1 e2 + +e0: e1 + | e0 '|' e1 +.EE +.PP +A +.B literal +is any non-metacharacter, or a metacharacter +(one of +.BR .*+?[]()|\e^$ ), +or the delimiter +preceded by +.LR \e . +.PP +A +.B charclass +is a nonempty string +.I s +bracketed +.BI [ \|s\| ] +(or +.BI [^ s\| ]\fR); +it matches any character in (or not in) +.IR s . +A negated character class never +matches newline. +A substring +.IB a - b\f1, +with +.I a +and +.I b +in ascending +order, stands for the inclusive +range of +characters between +.I a +and +.IR b . +In +.IR s , +the metacharacters +.LR - , +.LR ] , +an initial +.LR ^ , +and the regular expression delimiter +must be preceded by a +.LR \e ; +other metacharacters +have no special meaning and +may appear unescaped. +.PP +A +.L . +matches any character. +.PP +A +.L ^ +matches the beginning of a line; +.L $ +matches the end of the line. +.PP +The +.B REP +operators match zero or more +.RB ( * ), +one or more +.RB ( + ), +zero or one +.RB ( ? ), +instances respectively of the preceding regular expression +.BR e2 . +.PP +A concatenated regular expression, +.BR "e1\|e2" , +matches a match to +.B e1 +followed by a match to +.BR e2 . +.PP +An alternative regular expression, +.BR "e0\||\|e1" , +matches either a match to +.B e0 +or a match to +.BR e1 . +.PP +A match to any part of a regular expression +extends as far as possible without preventing +a match to the remainder of the regular expression. +.SH "SEE ALSO" +.IR regexp (3) -- 2.49.0