Mercurial > projects > pwm
view tok.c @ 25:616385fa1fd9
Build common functions as a library that can be reused
author | Guido Berhoerster <guido+pwm@berhoerster.name> |
---|---|
date | Wed, 20 Sep 2017 23:57:51 +0200 |
parents | a7e41e1a79c8 |
children | 722a45b4028b |
line wrap: on
line source
/* * Copyright (C) 2016 Guido Berhoerster <guido+pwm@berhoerster.name> * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "compat.h" #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "tok.h" enum tok_states { STATE_INITIAL, STATE_IN_WORD, STATE_IN_QUOTE, STATE_IN_WORD_ESCAPE, STATE_IN_QUOTE_ESCAPE }; static inline int strbuf_appendc(char **bufp, size_t *buf_sizep, int c) { char *buf = *bufp; size_t buf_size = *buf_sizep; size_t len; len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0; /* allocate buffer if *bufp is NULL and *buf_sizep is 0 */ if (buf_size < len + (c >= 0) + 1) { buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ; buf = realloc(buf, buf_size); if (buf == NULL) { return (-1); } } /* append character to string buffer or reset buffer if c is -1 */ if (c >= 0) { buf[len++] = c; } buf[len] = '\0'; *bufp = buf; *buf_sizep = buf_size; return (0); } enum tok_err tok_tokenize(const char *s, int *tokencp, char ***tokenvp) { int retval = TOK_ERR_SYSTEM_ERROR; int saved_errno = 0; char **tokenv; size_t tokenc = 0; const char *p = s; enum tok_states state = STATE_INITIAL; char quote; char *buf = NULL; size_t buf_size = 0; char *token; size_t i; /* * allocate maximum number of tokens including the terminating NULL * pointer: ceil(length / 2) + 1 */ tokenv = malloc(((strlen(s) + 2 - 1) / 2 + 1) * sizeof (char *)); if (tokenv == NULL) { saved_errno = errno; goto out; } tokenv[0] = NULL; for (;;) { switch (state) { case STATE_INITIAL: switch (*p) { case ' ': /* FALLTHROUGH */ case '\t': /* FALLTHROUGH */ case '\n': /* skip initial whitespace */ break; case '"': /* FALLTHROUGH */ case '\'': /* start quoted part of token */ state = STATE_IN_QUOTE; quote = *p; if (strbuf_appendc(&buf, &buf_size, -1) != 0) { saved_errno = errno; goto out; } break; case '\\': /* start token with a backslash escape */ state = STATE_IN_WORD_ESCAPE; if (strbuf_appendc(&buf, &buf_size, -1) != 0) { saved_errno = errno; goto out; } break; case '\0': /* end of input */ retval = 0; goto out; default: /* start token with a word */ state = STATE_IN_WORD; if (strbuf_appendc(&buf, &buf_size, -1) != 0) { saved_errno = errno; goto out; } if (strbuf_appendc(&buf, &buf_size, *p) != 0) { saved_errno = errno; goto out; } } break; case STATE_IN_WORD: switch (*p) { case ' ': /* FALLTHROUGH */ case '\t': /* FALLTHROUGH */ case '\n': /* FALLTHROUGH */ case '\0': /* end of token */ token = strdup(buf); if (token == NULL) { saved_errno = errno; goto out; } tokenv[tokenc++] = token; tokenv[tokenc] = NULL; if (*p == '\0') { retval = 0; goto out; } state = STATE_INITIAL; break; case '"': /* FALLTHROUGH */ case '\'': /* start quoted part of token */ state = STATE_IN_QUOTE; quote = *p; break; case '\\': /* start backslash escape */ state = STATE_IN_WORD_ESCAPE; break; default: /* regular character */ if (strbuf_appendc(&buf, &buf_size, *p) != 0) { saved_errno = errno; goto out; } } break; case STATE_IN_QUOTE: switch (*p) { case '"': /* FALLTHROUGH */ case '\'': if (*p == quote) { /* end quoted part of token */ state = STATE_IN_WORD; } else { /* quote quote character */ if (strbuf_appendc(&buf, &buf_size, *p) != 0) { saved_errno = errno; goto out; } } break; case '\\': /* start quoted backslash escape */ state = STATE_IN_QUOTE_ESCAPE; break; case '\0': /* unclosed quote */ retval = TOK_ERR_UNTERMINATED_QUOTE; goto out; default: /* regular character */ if (strbuf_appendc(&buf, &buf_size, *p) != 0) { saved_errno = errno; goto out; } } break; case STATE_IN_WORD_ESCAPE: /* FALLTHROUGH */ case STATE_IN_QUOTE_ESCAPE: if (*p == '\0') { /* trailing backslash */ retval = TOK_ERR_TRAILING_BACKSLASH; goto out; } /* escaped character */ state = (state == STATE_IN_WORD_ESCAPE) ? STATE_IN_WORD : STATE_IN_QUOTE; if (strbuf_appendc(&buf, &buf_size, *p) != 0) { saved_errno = errno; goto out; } break; } p++; } out: if (retval < 0) { for (i = 0; i < tokenc; i++) { free(tokenv[i]); } free(tokenv); } else { *tokencp = tokenc; *tokenvp = realloc(tokenv, (tokenc + 1) * sizeof (char *)); } free(buf); if (retval < 0) { errno = saved_errno; } return (retval); }