Mercurial > projects > pwm
view tok.c @ 42:fb995e5d54e9 version-1
Release version 1
author | Guido Berhoerster <guido+pwm@berhoerster.name> |
---|---|
date | Tue, 20 Aug 2019 21:26:55 +0200 |
parents | 722a45b4028b |
children |
line wrap: on
line source
/* * Copyright (C) 2017 Guido Berhoerster <guido+pwm@berhoerster.name> * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "compat.h" #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "tok.h" #include "util.h" enum tok_states { STATE_INITIAL, STATE_IN_WORD, STATE_IN_QUOTE, STATE_IN_WORD_ESCAPE, STATE_IN_QUOTE_ESCAPE, STATE_IN_MACRO }; static inline void strbuf_appendc(char **bufp, size_t *buf_sizep, int c) { char *buf = *bufp; size_t buf_size = *buf_sizep; size_t len; len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0; /* allocate buffer if *bufp is NULL and *buf_sizep is 0 */ if (buf_size < len + (c >= 0) + 1) { buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ; buf = xrealloc(buf, buf_size); } /* append character to string buffer or reset buffer if c is -1 */ if (c >= 0) { buf[len++] = c; } buf[len] = '\0'; *bufp = buf; *buf_sizep = buf_size; } void tok_free(union tok **tokenv) { size_t i; if (tokenv == NULL) { return; } for (i = 0; tokenv[i] != NULL; i++) { switch (tokenv[i]->any.type) { case TOK_MACRO: free(tokenv[i]->macro.name); break; case TOK_ARG: free(tokenv[i]->arg.value); break; } free(tokenv[i]); } free(tokenv); } enum tok_err tok_tokenize(const char *s, size_t *tokencp, union tok ***tokenvp) { int retval = TOK_ERR_OK; union tok **tokenv; size_t tokenc = 0; const char *p = s; enum tok_states state = STATE_INITIAL; char quote; char *buf = NULL; size_t buf_size = 0; char *value; char *name; /* allocate maximum number of tokens: ceil(length / 2) */ tokenv = xmalloc((((strlen(s) + 2 - 1) / 2) + 1) * sizeof (union tok *)); for (;;) { switch (state) { case STATE_INITIAL: switch (*p) { case ' ': /* FALLTHROUGH */ case '\t': /* FALLTHROUGH */ case '\n': /* skip initial whitespace */ break; case '"': /* FALLTHROUGH */ case '\'': /* start quoted part of token */ state = STATE_IN_QUOTE; quote = *p; strbuf_appendc(&buf, &buf_size, -1); break; case '\\': /* start token with a backslash escape */ state = STATE_IN_WORD_ESCAPE; strbuf_appendc(&buf, &buf_size, -1); break; case '$': /* start macro token */ state = STATE_IN_MACRO; strbuf_appendc(&buf, &buf_size, -1); break; case '\0': /* end of input */ retval = TOK_ERR_OK; goto out; default: /* start token with a word */ state = STATE_IN_WORD; strbuf_appendc(&buf, &buf_size, -1); strbuf_appendc(&buf, &buf_size, *p); } break; case STATE_IN_WORD: switch (*p) { case ' ': /* FALLTHROUGH */ case '\t': /* FALLTHROUGH */ case '\n': /* FALLTHROUGH */ case '\0': /* end of token */ value = xstrdup(buf); tokenv[tokenc] = xmalloc(sizeof (union tok)); tokenv[tokenc]->arg.type = TOK_ARG; tokenv[tokenc]->arg.value = value; tokenc++; if (*p == '\0') { retval = TOK_ERR_OK; goto out; } state = STATE_INITIAL; break; case '"': /* FALLTHROUGH */ case '\'': /* start quoted part of token */ state = STATE_IN_QUOTE; quote = *p; break; case '\\': /* start backslash escape */ state = STATE_IN_WORD_ESCAPE; break; default: /* regular character */ strbuf_appendc(&buf, &buf_size, *p); } break; case STATE_IN_QUOTE: switch (*p) { case '"': /* FALLTHROUGH */ case '\'': if (*p == quote) { /* end quoted part of token */ state = STATE_IN_WORD; } else { /* quote quote character */ strbuf_appendc(&buf, &buf_size, *p); } break; case '\\': /* start quoted backslash escape */ state = STATE_IN_QUOTE_ESCAPE; break; case '\0': /* unclosed quote */ retval = TOK_ERR_UNTERMINATED_QUOTE; goto out; default: /* regular character */ strbuf_appendc(&buf, &buf_size, *p); } break; case STATE_IN_WORD_ESCAPE: /* FALLTHROUGH */ case STATE_IN_QUOTE_ESCAPE: if (*p == '\0') { /* trailing backslash */ retval = TOK_ERR_TRAILING_BACKSLASH; goto out; } /* escaped character */ state = (state == STATE_IN_WORD_ESCAPE) ? STATE_IN_WORD : STATE_IN_QUOTE; strbuf_appendc(&buf, &buf_size, *p); break; case STATE_IN_MACRO: switch (*p) { case ' ': /* FALLTHROUGH */ case '\t': /* FALLTHROUGH */ case '\n': /* FALLTHROUGH */ case '\0': /* end of token */ name = xstrdup(buf); tokenv[tokenc] = xmalloc(sizeof (union tok)); tokenv[tokenc]->macro.type = TOK_MACRO; tokenv[tokenc]->macro.name = name; tokenc++; if (*p == '\0') { retval = TOK_ERR_OK; goto out; } state = STATE_INITIAL; break; default: /* * macro names must only contain alphanumeric * characters and underscores */ if (!isascii(*p) || (!isalnum(*p) && (*p != '_'))) { retval = TOK_ERR_INVALID_MACRO_NAME; goto out; } strbuf_appendc(&buf, &buf_size, *p); } break; } p++; } out: if (retval < 0) { tok_free(tokenv); } else { tokenv[tokenc] = NULL; *tokencp = tokenc; *tokenvp = xrealloc(tokenv, (tokenc + 1) * sizeof (union tok *)); } free(buf); return (retval); }