view tok.c @ 42:fb995e5d54e9 version-1

Release version 1
author Guido Berhoerster <guido+pwm@berhoerster.name>
date Tue, 20 Aug 2019 21:26:55 +0200
parents 722a45b4028b
children
line wrap: on
line source

/*
 * Copyright (C) 2017 Guido Berhoerster <guido+pwm@berhoerster.name>
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "compat.h"

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "tok.h"
#include "util.h"

enum tok_states {
	STATE_INITIAL,
	STATE_IN_WORD,
	STATE_IN_QUOTE,
	STATE_IN_WORD_ESCAPE,
	STATE_IN_QUOTE_ESCAPE,
	STATE_IN_MACRO
};

static inline void
strbuf_appendc(char **bufp, size_t *buf_sizep, int c)
{
	char	*buf = *bufp;
	size_t	buf_size = *buf_sizep;
	size_t	len;

	len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0;

	/* allocate buffer if *bufp is NULL and *buf_sizep is 0 */
	if (buf_size < len + (c >= 0) + 1) {
		buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ;
		buf = xrealloc(buf, buf_size);
	}

	/* append character to string buffer or reset buffer if c is -1 */
	if (c >= 0) {
		buf[len++] = c;
	}
	buf[len] = '\0';

	*bufp = buf;
	*buf_sizep = buf_size;
}

void
tok_free(union tok **tokenv)
{
	size_t	i;

	if (tokenv == NULL) {
		return;
	}

	for (i = 0; tokenv[i] != NULL; i++) {
		switch (tokenv[i]->any.type) {
		case TOK_MACRO:
			free(tokenv[i]->macro.name);
			break;
		case TOK_ARG:
			free(tokenv[i]->arg.value);
			break;
		}
		free(tokenv[i]);
	}
	free(tokenv);
}

enum tok_err
tok_tokenize(const char *s, size_t *tokencp, union tok ***tokenvp)
{
	int		retval = TOK_ERR_OK;
	union tok	**tokenv;
	size_t		tokenc = 0;
	const char	*p = s;
	enum tok_states	state = STATE_INITIAL;
	char		quote;
	char		*buf = NULL;
	size_t		buf_size = 0;
	char		*value;
	char		*name;

	/* allocate maximum number of tokens: ceil(length / 2) */
	tokenv = xmalloc((((strlen(s) + 2 - 1) / 2) + 1) *
	    sizeof (union tok *));

	for (;;) {
		switch (state) {
		case STATE_INITIAL:
			switch (*p) {
			case ' ':	/* FALLTHROUGH */
			case '\t':	/* FALLTHROUGH */
			case '\n':
				/* skip initial whitespace */
				break;
			case '"':	/* FALLTHROUGH */
			case '\'':
				/* start quoted part of token */
				state = STATE_IN_QUOTE;
				quote = *p;
				strbuf_appendc(&buf, &buf_size, -1);
				break;
			case '\\':
				/* start token with a backslash escape */
				state = STATE_IN_WORD_ESCAPE;
				strbuf_appendc(&buf, &buf_size, -1);
				break;
			case '$':
				/* start macro token */
				state = STATE_IN_MACRO;
				strbuf_appendc(&buf, &buf_size, -1);
				break;
			case '\0':
				/* end of input */
				retval = TOK_ERR_OK;
				goto out;
			default:
				/* start token with a word */
				state = STATE_IN_WORD;
				strbuf_appendc(&buf, &buf_size, -1);
				strbuf_appendc(&buf, &buf_size, *p);
			}
			break;
		case STATE_IN_WORD:
			switch (*p) {
			case ' ':	/* FALLTHROUGH */
			case '\t':	/* FALLTHROUGH */
			case '\n':	/* FALLTHROUGH */
			case '\0':
				/* end of token */
				value = xstrdup(buf);
				tokenv[tokenc] = xmalloc(sizeof (union tok));
				tokenv[tokenc]->arg.type = TOK_ARG;
				tokenv[tokenc]->arg.value = value;
				tokenc++;

				if (*p == '\0') {
					retval = TOK_ERR_OK;
					goto out;
				}
				state = STATE_INITIAL;
				break;
			case '"':	/* FALLTHROUGH */
			case '\'':
				/* start quoted part of token */
				state = STATE_IN_QUOTE;
				quote = *p;
				break;
			case '\\':
				/* start backslash escape */
				state = STATE_IN_WORD_ESCAPE;
				break;
			default:
				/* regular character */
				strbuf_appendc(&buf, &buf_size, *p);
			}
			break;
		case STATE_IN_QUOTE:
			switch (*p) {
			case '"':	/* FALLTHROUGH */
			case '\'':
				if (*p == quote) {
					/* end quoted part of token */
					state = STATE_IN_WORD;
				} else {
					/* quote quote character */
					strbuf_appendc(&buf, &buf_size, *p);
				}
				break;
			case '\\':
				/* start quoted backslash escape */
				state = STATE_IN_QUOTE_ESCAPE;
				break;
			case '\0':
				/* unclosed quote */
				retval = TOK_ERR_UNTERMINATED_QUOTE;
				goto out;
			default:
				/* regular character */
				strbuf_appendc(&buf, &buf_size, *p);
			}
			break;
		case STATE_IN_WORD_ESCAPE:	/* FALLTHROUGH */
		case STATE_IN_QUOTE_ESCAPE:
			if (*p == '\0') {
				/* trailing backslash */
				retval = TOK_ERR_TRAILING_BACKSLASH;
				goto out;
			}
			/* escaped character */
			state = (state == STATE_IN_WORD_ESCAPE) ?
			    STATE_IN_WORD : STATE_IN_QUOTE;
			strbuf_appendc(&buf, &buf_size, *p);
			break;
		case STATE_IN_MACRO:
			switch (*p) {
			case ' ':	/* FALLTHROUGH */
			case '\t':	/* FALLTHROUGH */
			case '\n':	/* FALLTHROUGH */
			case '\0':
				/* end of token */
				name = xstrdup(buf);
				tokenv[tokenc] = xmalloc(sizeof (union tok));
				tokenv[tokenc]->macro.type = TOK_MACRO;
				tokenv[tokenc]->macro.name = name;
				tokenc++;

				if (*p == '\0') {
					retval = TOK_ERR_OK;
					goto out;
				}
				state = STATE_INITIAL;
				break;
			default:
				/*
				 * macro names must only contain alphanumeric
				 * characters and underscores
				 */
				if (!isascii(*p) || (!isalnum(*p) &&
				    (*p != '_'))) {
					retval = TOK_ERR_INVALID_MACRO_NAME;
					goto out;
				}
				strbuf_appendc(&buf, &buf_size, *p);
			}
			break;
		}
		p++;
	}

out:
	if (retval < 0) {
		tok_free(tokenv);
	} else {
		tokenv[tokenc] = NULL;
		*tokencp = tokenc;
		*tokenvp = xrealloc(tokenv, (tokenc + 1) *
		    sizeof (union tok *));
	}
	free(buf);

	return (retval);
}