Mercurial > projects > pwm
comparison tok.c @ 0:a7e41e1a79c8
Initial revision
author | Guido Berhoerster <guido+pwm@berhoerster.name> |
---|---|
date | Thu, 19 Jan 2017 22:39:51 +0100 |
parents | |
children | 722a45b4028b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a7e41e1a79c8 |
---|---|
1 /* | |
2 * Copyright (C) 2016 Guido Berhoerster <guido+pwm@berhoerster.name> | |
3 * | |
4 * Permission is hereby granted, free of charge, to any person obtaining | |
5 * a copy of this software and associated documentation files (the | |
6 * "Software"), to deal in the Software without restriction, including | |
7 * without limitation the rights to use, copy, modify, merge, publish, | |
8 * distribute, sublicense, and/or sell copies of the Software, and to | |
9 * permit persons to whom the Software is furnished to do so, subject to | |
10 * the following conditions: | |
11 * | |
12 * The above copyright notice and this permission notice shall be included | |
13 * in all copies or substantial portions of the Software. | |
14 * | |
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
22 */ | |
23 | |
24 #include "compat.h" | |
25 | |
26 #include <errno.h> | |
27 #include <stdio.h> | |
28 #include <stdlib.h> | |
29 #include <string.h> | |
30 | |
31 #include "tok.h" | |
32 | |
33 enum tok_states { | |
34 STATE_INITIAL, | |
35 STATE_IN_WORD, | |
36 STATE_IN_QUOTE, | |
37 STATE_IN_WORD_ESCAPE, | |
38 STATE_IN_QUOTE_ESCAPE | |
39 }; | |
40 | |
41 static inline int | |
42 strbuf_appendc(char **bufp, size_t *buf_sizep, int c) | |
43 { | |
44 char *buf = *bufp; | |
45 size_t buf_size = *buf_sizep; | |
46 size_t len; | |
47 | |
48 len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0; | |
49 | |
50 /* allocate buffer if *bufp is NULL and *buf_sizep is 0 */ | |
51 if (buf_size < len + (c >= 0) + 1) { | |
52 buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ; | |
53 buf = realloc(buf, buf_size); | |
54 if (buf == NULL) { | |
55 return (-1); | |
56 } | |
57 } | |
58 | |
59 /* append character to string buffer or reset buffer if c is -1 */ | |
60 if (c >= 0) { | |
61 buf[len++] = c; | |
62 } | |
63 buf[len] = '\0'; | |
64 | |
65 *bufp = buf; | |
66 *buf_sizep = buf_size; | |
67 | |
68 return (0); | |
69 } | |
70 | |
71 enum tok_err | |
72 tok_tokenize(const char *s, int *tokencp, char ***tokenvp) | |
73 { | |
74 int retval = TOK_ERR_SYSTEM_ERROR; | |
75 int saved_errno = 0; | |
76 char **tokenv; | |
77 size_t tokenc = 0; | |
78 const char *p = s; | |
79 enum tok_states state = STATE_INITIAL; | |
80 char quote; | |
81 char *buf = NULL; | |
82 size_t buf_size = 0; | |
83 char *token; | |
84 size_t i; | |
85 | |
86 /* | |
87 * allocate maximum number of tokens including the terminating NULL | |
88 * pointer: ceil(length / 2) + 1 | |
89 */ | |
90 tokenv = malloc(((strlen(s) + 2 - 1) / 2 + 1) * sizeof (char *)); | |
91 if (tokenv == NULL) { | |
92 saved_errno = errno; | |
93 goto out; | |
94 } | |
95 tokenv[0] = NULL; | |
96 | |
97 for (;;) { | |
98 switch (state) { | |
99 case STATE_INITIAL: | |
100 switch (*p) { | |
101 case ' ': /* FALLTHROUGH */ | |
102 case '\t': /* FALLTHROUGH */ | |
103 case '\n': | |
104 /* skip initial whitespace */ | |
105 break; | |
106 case '"': /* FALLTHROUGH */ | |
107 case '\'': | |
108 /* start quoted part of token */ | |
109 state = STATE_IN_QUOTE; | |
110 quote = *p; | |
111 if (strbuf_appendc(&buf, &buf_size, -1) != 0) { | |
112 saved_errno = errno; | |
113 goto out; | |
114 } | |
115 break; | |
116 case '\\': | |
117 /* start token with a backslash escape */ | |
118 state = STATE_IN_WORD_ESCAPE; | |
119 if (strbuf_appendc(&buf, &buf_size, -1) != 0) { | |
120 saved_errno = errno; | |
121 goto out; | |
122 } | |
123 break; | |
124 case '\0': | |
125 /* end of input */ | |
126 retval = 0; | |
127 goto out; | |
128 default: | |
129 /* start token with a word */ | |
130 state = STATE_IN_WORD; | |
131 if (strbuf_appendc(&buf, &buf_size, -1) != 0) { | |
132 saved_errno = errno; | |
133 goto out; | |
134 } | |
135 if (strbuf_appendc(&buf, &buf_size, *p) != 0) { | |
136 saved_errno = errno; | |
137 goto out; | |
138 } | |
139 } | |
140 break; | |
141 case STATE_IN_WORD: | |
142 switch (*p) { | |
143 case ' ': /* FALLTHROUGH */ | |
144 case '\t': /* FALLTHROUGH */ | |
145 case '\n': /* FALLTHROUGH */ | |
146 case '\0': | |
147 /* end of token */ | |
148 token = strdup(buf); | |
149 if (token == NULL) { | |
150 saved_errno = errno; | |
151 goto out; | |
152 } | |
153 tokenv[tokenc++] = token; | |
154 tokenv[tokenc] = NULL; | |
155 if (*p == '\0') { | |
156 retval = 0; | |
157 goto out; | |
158 } | |
159 state = STATE_INITIAL; | |
160 break; | |
161 case '"': /* FALLTHROUGH */ | |
162 case '\'': | |
163 /* start quoted part of token */ | |
164 state = STATE_IN_QUOTE; | |
165 quote = *p; | |
166 break; | |
167 case '\\': | |
168 /* start backslash escape */ | |
169 state = STATE_IN_WORD_ESCAPE; | |
170 break; | |
171 default: | |
172 /* regular character */ | |
173 if (strbuf_appendc(&buf, &buf_size, *p) != 0) { | |
174 saved_errno = errno; | |
175 goto out; | |
176 } | |
177 } | |
178 break; | |
179 case STATE_IN_QUOTE: | |
180 switch (*p) { | |
181 case '"': /* FALLTHROUGH */ | |
182 case '\'': | |
183 if (*p == quote) { | |
184 /* end quoted part of token */ | |
185 state = STATE_IN_WORD; | |
186 } else { | |
187 /* quote quote character */ | |
188 if (strbuf_appendc(&buf, &buf_size, | |
189 *p) != 0) { | |
190 saved_errno = errno; | |
191 goto out; | |
192 } | |
193 } | |
194 break; | |
195 case '\\': | |
196 /* start quoted backslash escape */ | |
197 state = STATE_IN_QUOTE_ESCAPE; | |
198 break; | |
199 case '\0': | |
200 /* unclosed quote */ | |
201 retval = TOK_ERR_UNTERMINATED_QUOTE; | |
202 goto out; | |
203 default: | |
204 /* regular character */ | |
205 if (strbuf_appendc(&buf, &buf_size, *p) != 0) { | |
206 saved_errno = errno; | |
207 goto out; | |
208 } | |
209 } | |
210 break; | |
211 case STATE_IN_WORD_ESCAPE: /* FALLTHROUGH */ | |
212 case STATE_IN_QUOTE_ESCAPE: | |
213 if (*p == '\0') { | |
214 /* trailing backslash */ | |
215 retval = TOK_ERR_TRAILING_BACKSLASH; | |
216 goto out; | |
217 } | |
218 /* escaped character */ | |
219 state = (state == STATE_IN_WORD_ESCAPE) ? | |
220 STATE_IN_WORD : STATE_IN_QUOTE; | |
221 if (strbuf_appendc(&buf, &buf_size, *p) != 0) { | |
222 saved_errno = errno; | |
223 goto out; | |
224 } | |
225 break; | |
226 } | |
227 p++; | |
228 } | |
229 | |
230 out: | |
231 if (retval < 0) { | |
232 for (i = 0; i < tokenc; i++) { | |
233 free(tokenv[i]); | |
234 } | |
235 free(tokenv); | |
236 } else { | |
237 *tokencp = tokenc; | |
238 *tokenvp = realloc(tokenv, (tokenc + 1) * sizeof (char *)); | |
239 } | |
240 free(buf); | |
241 if (retval < 0) { | |
242 errno = saved_errno; | |
243 } | |
244 | |
245 return (retval); | |
246 } |