diff tok.c @ 0:a7e41e1a79c8

Initial revision
author Guido Berhoerster <guido+pwm@berhoerster.name>
date Thu, 19 Jan 2017 22:39:51 +0100
parents
children 722a45b4028b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tok.c	Thu Jan 19 22:39:51 2017 +0100
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2016 Guido Berhoerster <guido+pwm@berhoerster.name>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "compat.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "tok.h"
+
+enum tok_states {
+	STATE_INITIAL,
+	STATE_IN_WORD,
+	STATE_IN_QUOTE,
+	STATE_IN_WORD_ESCAPE,
+	STATE_IN_QUOTE_ESCAPE
+};
+
+static inline int
+strbuf_appendc(char **bufp, size_t *buf_sizep, int c)
+{
+	char	*buf = *bufp;
+	size_t	buf_size = *buf_sizep;
+	size_t	len;
+
+	len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0;
+
+	/* allocate buffer if *bufp is NULL and *buf_sizep is 0 */
+	if (buf_size < len + (c >= 0) + 1) {
+		buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ;
+		buf = realloc(buf, buf_size);
+		if (buf == NULL) {
+			return (-1);
+		}
+	}
+
+	/* append character to string buffer or reset buffer if c is -1 */
+	if (c >= 0) {
+		buf[len++] = c;
+	}
+	buf[len] = '\0';
+
+	*bufp = buf;
+	*buf_sizep = buf_size;
+
+	return (0);
+}
+
+enum tok_err
+tok_tokenize(const char *s, int *tokencp, char ***tokenvp)
+{
+	int		retval = TOK_ERR_SYSTEM_ERROR;
+	int		saved_errno = 0;
+	char		**tokenv;
+	size_t		tokenc = 0;
+	const char	*p = s;
+	enum tok_states	state = STATE_INITIAL;
+	char		quote;
+	char		*buf = NULL;
+	size_t		buf_size = 0;
+	char		*token;
+	size_t		i;
+
+	/*
+	 * allocate maximum number of tokens including the terminating NULL
+	 * pointer: ceil(length / 2) + 1
+	 */
+	tokenv = malloc(((strlen(s) + 2 - 1) / 2 + 1) * sizeof (char *));
+	if (tokenv == NULL) {
+		saved_errno = errno;
+		goto out;
+	}
+	tokenv[0] = NULL;
+
+	for (;;) {
+		switch (state) {
+		case STATE_INITIAL:
+			switch (*p) {
+			case ' ':	/* FALLTHROUGH */
+			case '\t':	/* FALLTHROUGH */
+			case '\n':
+				/* skip initial whitespace */
+				break;
+			case '"':	/* FALLTHROUGH */
+			case '\'':
+				/* start quoted part of token */
+				state = STATE_IN_QUOTE;
+				quote = *p;
+				if (strbuf_appendc(&buf, &buf_size, -1) != 0) {
+					saved_errno = errno;
+					goto out;
+				}
+				break;
+			case '\\':
+				/* start token with a backslash escape */
+				state = STATE_IN_WORD_ESCAPE;
+				if (strbuf_appendc(&buf, &buf_size, -1) != 0) {
+					saved_errno = errno;
+					goto out;
+				}
+				break;
+			case '\0':
+				/* end of input */
+				retval = 0;
+				goto out;
+			default:
+				/* start token with a word */
+				state = STATE_IN_WORD;
+				if (strbuf_appendc(&buf, &buf_size, -1) != 0) {
+					saved_errno = errno;
+					goto out;
+				}
+				if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
+					saved_errno = errno;
+					goto out;
+				}
+			}
+			break;
+		case STATE_IN_WORD:
+			switch (*p) {
+			case ' ':	/* FALLTHROUGH */
+			case '\t':	/* FALLTHROUGH */
+			case '\n':	/* FALLTHROUGH */
+			case '\0':
+				/* end of token */
+				token = strdup(buf);
+				if (token == NULL) {
+					saved_errno = errno;
+					goto out;
+				}
+				tokenv[tokenc++] = token;
+				tokenv[tokenc] = NULL;
+				if (*p == '\0') {
+					retval = 0;
+					goto out;
+				}
+				state = STATE_INITIAL;
+				break;
+			case '"':	/* FALLTHROUGH */
+			case '\'':
+				/* start quoted part of token */
+				state = STATE_IN_QUOTE;
+				quote = *p;
+				break;
+			case '\\':
+				/* start backslash escape */
+				state = STATE_IN_WORD_ESCAPE;
+				break;
+			default:
+				/* regular character */
+				if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
+					saved_errno = errno;
+					goto out;
+				}
+			}
+			break;
+		case STATE_IN_QUOTE:
+			switch (*p) {
+			case '"':	/* FALLTHROUGH */
+			case '\'':
+				if (*p == quote) {
+					/* end quoted part of token */
+					state = STATE_IN_WORD;
+				} else {
+					/* quote quote character */
+					if (strbuf_appendc(&buf, &buf_size,
+					    *p) != 0) {
+						saved_errno = errno;
+						goto out;
+					}
+				}
+				break;
+			case '\\':
+				/* start quoted backslash escape */
+				state = STATE_IN_QUOTE_ESCAPE;
+				break;
+			case '\0':
+				/* unclosed quote */
+				retval = TOK_ERR_UNTERMINATED_QUOTE;
+				goto out;
+			default:
+				/* regular character */
+				if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
+					saved_errno = errno;
+					goto out;
+				}
+			}
+			break;
+		case STATE_IN_WORD_ESCAPE:	/* FALLTHROUGH */
+		case STATE_IN_QUOTE_ESCAPE:
+			if (*p == '\0') {
+				/* trailing backslash */
+				retval = TOK_ERR_TRAILING_BACKSLASH;
+				goto out;
+			}
+			/* escaped character */
+			state = (state == STATE_IN_WORD_ESCAPE) ?
+			    STATE_IN_WORD : STATE_IN_QUOTE;
+			if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
+				saved_errno = errno;
+				goto out;
+			}
+			break;
+		}
+		p++;
+	}
+
+out:
+	if (retval < 0) {
+		for (i = 0; i < tokenc; i++) {
+			free(tokenv[i]);
+		}
+		free(tokenv);
+	} else {
+		*tokencp = tokenc;
+		*tokenvp = realloc(tokenv, (tokenc + 1) * sizeof (char *));
+	}
+	free(buf);
+	if (retval < 0) {
+		errno = saved_errno;
+	}
+
+	return (retval);
+}