comparison tok.c @ 0:a7e41e1a79c8

Initial revision
author Guido Berhoerster <guido+pwm@berhoerster.name>
date Thu, 19 Jan 2017 22:39:51 +0100
parents
children 722a45b4028b
comparison
equal deleted inserted replaced
-1:000000000000 0:a7e41e1a79c8
1 /*
2 * Copyright (C) 2016 Guido Berhoerster <guido+pwm@berhoerster.name>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "compat.h"
25
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "tok.h"
32
33 enum tok_states {
34 STATE_INITIAL,
35 STATE_IN_WORD,
36 STATE_IN_QUOTE,
37 STATE_IN_WORD_ESCAPE,
38 STATE_IN_QUOTE_ESCAPE
39 };
40
41 static inline int
42 strbuf_appendc(char **bufp, size_t *buf_sizep, int c)
43 {
44 char *buf = *bufp;
45 size_t buf_size = *buf_sizep;
46 size_t len;
47
48 len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0;
49
50 /* allocate buffer if *bufp is NULL and *buf_sizep is 0 */
51 if (buf_size < len + (c >= 0) + 1) {
52 buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ;
53 buf = realloc(buf, buf_size);
54 if (buf == NULL) {
55 return (-1);
56 }
57 }
58
59 /* append character to string buffer or reset buffer if c is -1 */
60 if (c >= 0) {
61 buf[len++] = c;
62 }
63 buf[len] = '\0';
64
65 *bufp = buf;
66 *buf_sizep = buf_size;
67
68 return (0);
69 }
70
71 enum tok_err
72 tok_tokenize(const char *s, int *tokencp, char ***tokenvp)
73 {
74 int retval = TOK_ERR_SYSTEM_ERROR;
75 int saved_errno = 0;
76 char **tokenv;
77 size_t tokenc = 0;
78 const char *p = s;
79 enum tok_states state = STATE_INITIAL;
80 char quote;
81 char *buf = NULL;
82 size_t buf_size = 0;
83 char *token;
84 size_t i;
85
86 /*
87 * allocate maximum number of tokens including the terminating NULL
88 * pointer: ceil(length / 2) + 1
89 */
90 tokenv = malloc(((strlen(s) + 2 - 1) / 2 + 1) * sizeof (char *));
91 if (tokenv == NULL) {
92 saved_errno = errno;
93 goto out;
94 }
95 tokenv[0] = NULL;
96
97 for (;;) {
98 switch (state) {
99 case STATE_INITIAL:
100 switch (*p) {
101 case ' ': /* FALLTHROUGH */
102 case '\t': /* FALLTHROUGH */
103 case '\n':
104 /* skip initial whitespace */
105 break;
106 case '"': /* FALLTHROUGH */
107 case '\'':
108 /* start quoted part of token */
109 state = STATE_IN_QUOTE;
110 quote = *p;
111 if (strbuf_appendc(&buf, &buf_size, -1) != 0) {
112 saved_errno = errno;
113 goto out;
114 }
115 break;
116 case '\\':
117 /* start token with a backslash escape */
118 state = STATE_IN_WORD_ESCAPE;
119 if (strbuf_appendc(&buf, &buf_size, -1) != 0) {
120 saved_errno = errno;
121 goto out;
122 }
123 break;
124 case '\0':
125 /* end of input */
126 retval = 0;
127 goto out;
128 default:
129 /* start token with a word */
130 state = STATE_IN_WORD;
131 if (strbuf_appendc(&buf, &buf_size, -1) != 0) {
132 saved_errno = errno;
133 goto out;
134 }
135 if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
136 saved_errno = errno;
137 goto out;
138 }
139 }
140 break;
141 case STATE_IN_WORD:
142 switch (*p) {
143 case ' ': /* FALLTHROUGH */
144 case '\t': /* FALLTHROUGH */
145 case '\n': /* FALLTHROUGH */
146 case '\0':
147 /* end of token */
148 token = strdup(buf);
149 if (token == NULL) {
150 saved_errno = errno;
151 goto out;
152 }
153 tokenv[tokenc++] = token;
154 tokenv[tokenc] = NULL;
155 if (*p == '\0') {
156 retval = 0;
157 goto out;
158 }
159 state = STATE_INITIAL;
160 break;
161 case '"': /* FALLTHROUGH */
162 case '\'':
163 /* start quoted part of token */
164 state = STATE_IN_QUOTE;
165 quote = *p;
166 break;
167 case '\\':
168 /* start backslash escape */
169 state = STATE_IN_WORD_ESCAPE;
170 break;
171 default:
172 /* regular character */
173 if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
174 saved_errno = errno;
175 goto out;
176 }
177 }
178 break;
179 case STATE_IN_QUOTE:
180 switch (*p) {
181 case '"': /* FALLTHROUGH */
182 case '\'':
183 if (*p == quote) {
184 /* end quoted part of token */
185 state = STATE_IN_WORD;
186 } else {
187 /* quote quote character */
188 if (strbuf_appendc(&buf, &buf_size,
189 *p) != 0) {
190 saved_errno = errno;
191 goto out;
192 }
193 }
194 break;
195 case '\\':
196 /* start quoted backslash escape */
197 state = STATE_IN_QUOTE_ESCAPE;
198 break;
199 case '\0':
200 /* unclosed quote */
201 retval = TOK_ERR_UNTERMINATED_QUOTE;
202 goto out;
203 default:
204 /* regular character */
205 if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
206 saved_errno = errno;
207 goto out;
208 }
209 }
210 break;
211 case STATE_IN_WORD_ESCAPE: /* FALLTHROUGH */
212 case STATE_IN_QUOTE_ESCAPE:
213 if (*p == '\0') {
214 /* trailing backslash */
215 retval = TOK_ERR_TRAILING_BACKSLASH;
216 goto out;
217 }
218 /* escaped character */
219 state = (state == STATE_IN_WORD_ESCAPE) ?
220 STATE_IN_WORD : STATE_IN_QUOTE;
221 if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
222 saved_errno = errno;
223 goto out;
224 }
225 break;
226 }
227 p++;
228 }
229
230 out:
231 if (retval < 0) {
232 for (i = 0; i < tokenc; i++) {
233 free(tokenv[i]);
234 }
235 free(tokenv);
236 } else {
237 *tokencp = tokenc;
238 *tokenvp = realloc(tokenv, (tokenc + 1) * sizeof (char *));
239 }
240 free(buf);
241 if (retval < 0) {
242 errno = saved_errno;
243 }
244
245 return (retval);
246 }