Add scripts/find-banned.sh

author Johan Malm <jgm323@gmail.com>

Mon, 30 Jan 2023 21:24:52 +0000 (21:24 +0000)

committer Johan Malm <johanmalm@users.noreply.github.com>

Thu, 6 Jul 2023 17:04:55 +0000 (18:04 +0100)
author Johan Malm <jgm323@gmail.com>
Mon, 30 Jan 2023 21:24:52 +0000 (21:24 +0000)
committer Johan Malm <johanmalm@users.noreply.github.com>
Thu, 6 Jul 2023 17:04:55 +0000 (18:04 +0100)
diff --git a/scripts/.gitignore b/scripts/.gitignore

new file mode 100644 (file)

index 0000000..ed76aa7
--- /dev/null
+++ b/scripts/.gitignore
@@ -0,0 +1,2 @@
+*.o
+find-banned
diff --git a/scripts/find-banned.sh b/scripts/find-banned.sh

new file mode 100755 (executable)

index 0000000..25e915f
--- /dev/null
+++ b/scripts/find-banned.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+banned="malloc,g_strcmp0,sprintf,vsprintf,strcpy,strncpy,strcat,strncat"
+
+find src/ include/ \( -name "*.c" -o -name "*.h" \) -type f \
+       | ./scripts/helper/find-idents --tokens=$banned -
diff --git a/scripts/helper/Makefile b/scripts/helper/Makefile

new file mode 100644 (file)

index 0000000..e04f063
--- /dev/null
+++ b/scripts/helper/Makefile
@@ -0,0 +1,12 @@
+CFLAGS += -g -Wall -O0 -std=c11
+LDFLAGS += -fsanitize=address
+
+PROGS = find-idents
+
+all: $(PROGS)
+
+find-idents: find-idents.o
+       $(CC) -o $@ $^
+
+clean :
+       $(RM) $(PROGS) *.o
diff --git a/scripts/helper/find-idents.c b/scripts/helper/find-idents.c

new file mode 100644 (file)

index 0000000..69556ae
--- /dev/null
+++ b/scripts/helper/find-idents.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helper to find identifier names in C files
+ *
+ * Copyright (C) Johan Malm 2023
+ *
+ * It tokenizes the specified C file and searches all identifier-tokens against
+ * the specified patterns.
+ *
+ * An identifier in this context is any alphanumeric/underscore string starting
+ * with a letter [A-Za-z] or underscore. It represents entities such as
+ * functions, variables, user-defined data types and C language keywords.
+ * Alphanumeric strings within comments are ignored, but not parsing of tokens
+ * is carried out to understand their semantic meaning.
+ */
+
+#define _POSIX_C_SOURCE 200809L
+#include <assert.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct buf {
+       char *buf;
+       int alloc;
+       int len;
+};
+
+enum token_kind {
+       TOKEN_NONE = 0,
+       TOKEN_IDENTIFIER, /* For example: static extern if while */
+       TOKEN_LITERAL, /* For example: 0xff 42 "foo" */
+       TOKEN_SPECIAL, /* For example: ++ -= ! ... */
+};
+
+struct token {
+       int line;
+       enum token_kind kind;
+       struct buf name;
+       unsigned int special;
+};
+
+enum {
+       SPECIAL_ELLIPSIS = 256,
+       SPECIAL_ASSIGN,
+       SPECIAL_BIT_OP,
+       SPECIAL_INC_OP,
+       SPECIAL_DEC_OP,
+       SPECIAL_PTR_OP,
+       SPECIAL_AND_OP,
+       SPECIAL_OR_OP,
+       SPECIAL_COMPARISON_OP,
+       SPECIAL_COMMENT_BEGIN,
+       SPECIAL_COMMENT_END,
+};
+
+static char *current_buffer_position;
+static struct token *tokens;
+static int nr_tokens, alloc_tokens;
+static int current_line = 1;
+
+void
+buf_init(struct buf *s)
+{
+       s->alloc = 256;
+       s->buf = malloc(s->alloc);
+       s->buf[0] = '\0';
+       s->len = 0;
+}
+
+void
+buf_add(struct buf *s, const char *data, size_t len)
+{
+       if (!data || data[0] == '\0') {
+               return;
+       }
+       if (s->alloc <= s->len + len + 1) {
+               s->alloc = s->alloc + len;
+               s->buf = realloc(s->buf, s->alloc);
+       }
+       memcpy(s->buf + s->len, data, len);
+       s->len += len;
+       s->buf[s->len] = 0;
+}
+
+void
+buf_add_char(struct buf *s, char ch)
+{
+       if (s->alloc <= s->len + 1) {
+               s->alloc = s->alloc * 2 + 16;
+               s->buf = realloc(s->buf, s->alloc);
+       }
+       s->buf[s->len++] = ch;
+       s->buf[s->len] = 0;
+}
+
+static struct token *
+add_token(void)
+{
+       if (nr_tokens == alloc_tokens) {
+               alloc_tokens = (alloc_tokens + 16) * 2;
+               tokens = realloc(tokens, alloc_tokens * sizeof(struct token));
+       }
+       struct token *token = tokens + nr_tokens;
+       memset(token, 0, sizeof(*token));
+       nr_tokens++;
+       buf_init(&token->name);
+       token->line = current_line;
+       return token;
+}
+
+static void
+handle_whitespace(struct token *token)
+{
+       if (current_buffer_position[0] == '\n') {
+               ++current_line;
+       }
+       current_buffer_position++;
+       if (isspace(current_buffer_position[0])) {
+               handle_whitespace(token);
+       }
+}
+
+static void
+get_identifier_token(struct token *token)
+{
+       buf_add_char(&token->name, current_buffer_position[0]);
+       current_buffer_position++;
+       if (isspace(current_buffer_position[0])) {
+               handle_whitespace(token);
+               return;
+       }
+       switch (current_buffer_position[0]) {
+       case '\0':
+               break;
+       case 'a' ... 'z':
+       case 'A' ... 'Z':
+       case '0' ... '9':
+       case '_':
+       case '#':
+               get_identifier_token(token);
+               break;
+       default:
+               break;
+       }
+}
+
+static void
+get_number_token(struct token *token)
+{
+       buf_add_char(&token->name, current_buffer_position[0]);
+       current_buffer_position++;
+       if (isspace(current_buffer_position[0])) {
+               handle_whitespace(token);
+               return;
+       }
+       switch (current_buffer_position[0]) {
+       case '\0':
+               break;
+       case '0' ... '9':
+       case 'a' ... 'f':
+       case 'A' ... 'F':
+       case 'x':
+               get_number_token(token);
+               break;
+       default:
+               break;
+       }
+}
+
+struct {
+       const char *combo;
+       unsigned int special;
+} specials[] = {
+       { "...", SPECIAL_ELLIPSIS },
+       { ">>=", SPECIAL_ASSIGN },
+       { "<<=", SPECIAL_ASSIGN },
+       { "+=", SPECIAL_ASSIGN },
+       { "-=", SPECIAL_ASSIGN },
+       { "*=", SPECIAL_ASSIGN },
+       { "/=", SPECIAL_ASSIGN },
+       { "%=", SPECIAL_ASSIGN },
+       { "&=", SPECIAL_ASSIGN },
+       { "^=", SPECIAL_ASSIGN },
+       { "|=", SPECIAL_ASSIGN },
+       { ">>", SPECIAL_BIT_OP },
+       { "<<", SPECIAL_BIT_OP },
+       { "++", SPECIAL_INC_OP },
+       { "--", SPECIAL_DEC_OP },
+       { "->", SPECIAL_PTR_OP },
+       { "&&", SPECIAL_AND_OP },
+       { "||", SPECIAL_OR_OP },
+       { "<=", SPECIAL_COMPARISON_OP },
+       { ">=", SPECIAL_COMPARISON_OP },
+       { "==", SPECIAL_COMPARISON_OP },
+       { "!=", SPECIAL_COMPARISON_OP },
+       { "/*", SPECIAL_COMMENT_BEGIN },
+       { "*/", SPECIAL_COMMENT_END },
+       { ";", ';' },
+       { "{", '{' },
+       { "}", '}' },
+       { ",", ',' },
+       { ":", ':' },
+       { "=", '=' },
+       { "(", '(' },
+       { ")", ')' },
+       { "[", '[' },
+       { "]", ']' },
+       { ".", '.' },
+       { "&", '&' },
+       { "!", '!' },
+       { "~", '~' },
+       { "-", '-' },
+       { "+", '+' },
+       { "*", '*' },
+       { "/", '/' },
+       { "%", '%' },
+       { "<", '<' },
+       { ">", '>' },
+       { "^", '^' },
+       { "|", '|' },
+       { "?", '?' },
+};
+
+static void
+get_special_token(struct token *token)
+{
+#define MAX_SPECIAL_LEN (3)
+       /* Peek up to MAX_SPECIAL_LEN-1 characters ahead */
+       char buf[MAX_SPECIAL_LEN + 1] = { 0 };
+       for (int i = 0; i < MAX_SPECIAL_LEN; i++) {
+               buf[i] = current_buffer_position[i];
+               if (!current_buffer_position[i]) {
+                       break;
+               }
+       }
+#undef MAX_SPECIAL_LEN
+
+       /* Compare with longest special tokens first */
+       int k;
+       for (k = strlen(buf); k > 0; k--) {
+               for (int j = 0; sizeof(specials) / sizeof(specials[0]); j++) {
+                       if (strlen(specials[j].combo) < k) {
+                               break;
+                       }
+                       if (!strcmp(specials[j].combo, buf)) {
+                               buf_add(&token->name, buf, k);
+                               token->special = specials[j].special;
+                               goto done;
+                       }
+               }
+               buf[k - 1] = '\0';
+       }
+done:
+       current_buffer_position += token->name.len;
+       if (isspace(current_buffer_position[0])) {
+               handle_whitespace(token);
+       }
+}
+
+static void
+handle_preprocessor_directive(void)
+{
+       /* We just ignore preprocessor lines */
+       for (;;) {
+               ++current_buffer_position;
+               if (current_buffer_position[0] == '\0') {
+                       return;
+               }
+               if (current_buffer_position[0] == '\n') {
+                       ++current_line;
+                       return;
+               }
+       }
+}
+
+struct token *
+lex(char *buffer)
+{
+       tokens = NULL;
+       nr_tokens = 0;
+       alloc_tokens = 0;
+
+       current_buffer_position = buffer;
+
+       for (;;) {
+               struct token *token = NULL;
+               switch (current_buffer_position[0]) {
+               case '\0':
+                       goto out;
+               case 'a' ... 'z':
+               case 'A' ... 'Z':
+               case '_':
+                       token = add_token();
+                       get_identifier_token(token);
+                       token->kind = TOKEN_IDENTIFIER;
+                       continue;
+               case '0' ... '9':
+                       token = add_token();
+                       get_number_token(token);
+                       token->kind = TOKEN_LITERAL;
+                       continue;
+               case '+': case '-': case '*': case '/': case '%': case '.':
+               case '>': case '<': case '=': case '!': case '&': case '|':
+               case '^': case '{': case '}': case '(': case ')': case ',':
+               case ';': case ':': case '[': case ']': case '~': case '?':
+                       token = add_token();
+                       get_special_token(token);
+                       token->kind = TOKEN_SPECIAL;
+                       continue;
+               case '#':
+                       handle_preprocessor_directive();
+                       break;
+               case '\n':
+                       ++current_line;
+                       break;
+               default:
+                       break;
+               }
+               ++current_buffer_position;
+       }
+out:
+       add_token(); /* end marker */
+       return tokens;
+}
+
+char *
+read_file(const char *filename)
+{
+       char *line = NULL;
+       size_t len = 0;
+       FILE *stream = fopen(filename, "r");
+       if (!stream) {
+               fprintf(stderr, "warn: cannot read '%s'\n", filename);
+               return NULL;
+       }
+       struct buf buffer;
+       buf_init(&buffer);
+       while ((getline(&line, &len, stream) != -1)) {
+               buf_add(&buffer, line, strlen(line));
+       }
+       free(line);
+       fclose(stream);
+       return buffer.buf;
+}
+
+static bool
+grep(struct token *tokens, const char *pattern)
+{
+       bool found = false;
+       bool in_comment = false;
+
+       for (struct token *t = tokens; t->kind; t++) {
+               if (t->kind == TOKEN_SPECIAL) {
+                       if (t->special == SPECIAL_COMMENT_BEGIN) {
+                               in_comment = true;
+                       } else if (t->special == SPECIAL_COMMENT_END) {
+                               in_comment = false;
+                       }
+               }
+               if (in_comment) {
+                       continue;
+               }
+               if (t->kind == TOKEN_IDENTIFIER) {
+                       if (!pattern || !strcmp(t->name.buf, pattern)) {
+                               found = true;
+                               printf("%d:\t%s\n", t->line, t->name.buf);
+                       }
+               }
+       }
+       return found;
+}
+
+int
+main(int argc, char **argv)
+{
+       struct token *tokens;
+       int found = false;
+
+       if (argc < 2) {
+               fprintf(stderr, "usage: %s <file> [<patterns>...]\n", argv[0]);
+               return EXIT_FAILURE;
+       }
+
+       char *buffer = read_file(argv[1]);
+       if (!buffer) {
+               return EXIT_FAILURE;
+       }
+       tokens = lex(buffer);
+       free(buffer);
+
+       if (argc == 2) {
+               /* Dump all idents */
+               grep(tokens, NULL);
+       } else {
+               for (int i = 2; i < argc; ++i) {
+                       found |= grep(tokens, argv[i]);
+               }
+       }
+
+       /* return failure (1) if we have found a banned identifier */
+       return found;
+}
author	Johan Malm <jgm323@gmail.com>
	Mon, 30 Jan 2023 21:24:52 +0000 (21:24 +0000)
committer	Johan Malm <johanmalm@users.noreply.github.com>
	Thu, 6 Jul 2023 17:04:55 +0000 (18:04 +0100)
scripts/.gitignore	[new file with mode: 0644]	patch \| blob
scripts/find-banned.sh	[new file with mode: 0755]	patch \| blob
scripts/helper/Makefile	[new file with mode: 0644]	patch \| blob
scripts/helper/find-idents.c	[new file with mode: 0644]	patch \| blob