Mercurial > projects > pwm
diff tok.c @ 0:a7e41e1a79c8
Initial revision
author | Guido Berhoerster <guido+pwm@berhoerster.name> |
---|---|
date | Thu, 19 Jan 2017 22:39:51 +0100 |
parents | |
children | 722a45b4028b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tok.c Thu Jan 19 22:39:51 2017 +0100 @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2016 Guido Berhoerster <guido+pwm@berhoerster.name> + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "compat.h" + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "tok.h" + +enum tok_states { + STATE_INITIAL, + STATE_IN_WORD, + STATE_IN_QUOTE, + STATE_IN_WORD_ESCAPE, + STATE_IN_QUOTE_ESCAPE +}; + +static inline int +strbuf_appendc(char **bufp, size_t *buf_sizep, int c) +{ + char *buf = *bufp; + size_t buf_size = *buf_sizep; + size_t len; + + len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0; + + /* allocate buffer if *bufp is NULL and *buf_sizep is 0 */ + if (buf_size < len + (c >= 0) + 1) { + buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ; + buf = realloc(buf, buf_size); + if (buf == NULL) { + return (-1); + } + } + + /* append character to string buffer or reset buffer if c is -1 */ + if (c >= 0) { + buf[len++] = c; + } + buf[len] = '\0'; + + *bufp = buf; + *buf_sizep = buf_size; + + return (0); +} + +enum tok_err +tok_tokenize(const char *s, int *tokencp, char ***tokenvp) +{ + int retval = TOK_ERR_SYSTEM_ERROR; + int saved_errno = 0; + char **tokenv; + size_t tokenc = 0; + const char *p = s; + enum tok_states state = STATE_INITIAL; + char quote; + char *buf = NULL; + size_t buf_size = 0; + char *token; + size_t i; + + /* + * allocate maximum number of tokens including the terminating NULL + * pointer: ceil(length / 2) + 1 + */ + tokenv = malloc(((strlen(s) + 2 - 1) / 2 + 1) * sizeof (char *)); + if (tokenv == NULL) { + saved_errno = errno; + goto out; + } + tokenv[0] = NULL; + + for (;;) { + switch (state) { + case STATE_INITIAL: + switch (*p) { + case ' ': /* FALLTHROUGH */ + case '\t': /* FALLTHROUGH */ + case '\n': + /* skip initial whitespace */ + break; + case '"': /* FALLTHROUGH */ + case '\'': + /* start quoted part of token */ + state = STATE_IN_QUOTE; + quote = *p; + if (strbuf_appendc(&buf, &buf_size, -1) != 0) { + saved_errno = errno; + goto out; + } + break; + case '\\': + /* start token with a backslash escape */ + state = STATE_IN_WORD_ESCAPE; + if (strbuf_appendc(&buf, &buf_size, -1) != 0) { + saved_errno = errno; + goto out; + } + break; + case '\0': + /* end of input */ + retval = 0; + goto out; + default: + /* start token with a word */ + state = STATE_IN_WORD; + if (strbuf_appendc(&buf, &buf_size, -1) != 0) { + saved_errno = errno; + goto out; + } + if (strbuf_appendc(&buf, &buf_size, *p) != 0) { + saved_errno = errno; + goto out; + } + } + break; + case STATE_IN_WORD: + switch (*p) { + case ' ': /* FALLTHROUGH */ + case '\t': /* FALLTHROUGH */ + case '\n': /* FALLTHROUGH */ + case '\0': + /* end of token */ + token = strdup(buf); + if (token == NULL) { + saved_errno = errno; + goto out; + } + tokenv[tokenc++] = token; + tokenv[tokenc] = NULL; + if (*p == '\0') { + retval = 0; + goto out; + } + state = STATE_INITIAL; + break; + case '"': /* FALLTHROUGH */ + case '\'': + /* start quoted part of token */ + state = STATE_IN_QUOTE; + quote = *p; + break; + case '\\': + /* start backslash escape */ + state = STATE_IN_WORD_ESCAPE; + break; + default: + /* regular character */ + if (strbuf_appendc(&buf, &buf_size, *p) != 0) { + saved_errno = errno; + goto out; + } + } + break; + case STATE_IN_QUOTE: + switch (*p) { + case '"': /* FALLTHROUGH */ + case '\'': + if (*p == quote) { + /* end quoted part of token */ + state = STATE_IN_WORD; + } else { + /* quote quote character */ + if (strbuf_appendc(&buf, &buf_size, + *p) != 0) { + saved_errno = errno; + goto out; + } + } + break; + case '\\': + /* start quoted backslash escape */ + state = STATE_IN_QUOTE_ESCAPE; + break; + case '\0': + /* unclosed quote */ + retval = TOK_ERR_UNTERMINATED_QUOTE; + goto out; + default: + /* regular character */ + if (strbuf_appendc(&buf, &buf_size, *p) != 0) { + saved_errno = errno; + goto out; + } + } + break; + case STATE_IN_WORD_ESCAPE: /* FALLTHROUGH */ + case STATE_IN_QUOTE_ESCAPE: + if (*p == '\0') { + /* trailing backslash */ + retval = TOK_ERR_TRAILING_BACKSLASH; + goto out; + } + /* escaped character */ + state = (state == STATE_IN_WORD_ESCAPE) ? + STATE_IN_WORD : STATE_IN_QUOTE; + if (strbuf_appendc(&buf, &buf_size, *p) != 0) { + saved_errno = errno; + goto out; + } + break; + } + p++; + } + +out: + if (retval < 0) { + for (i = 0; i < tokenc; i++) { + free(tokenv[i]); + } + free(tokenv); + } else { + *tokencp = tokenc; + *tokenvp = realloc(tokenv, (tokenc + 1) * sizeof (char *)); + } + free(buf); + if (retval < 0) { + errno = saved_errno; + } + + return (retval); +}