comparison tok.c @ 27:722a45b4028b

Add define command for defining macros Macros are parsed when they are defined with the D command and can subsequently be used as arguments for other commands. Handle out of memory errors directly in tok.c.
author Guido Berhoerster <guido+pwm@berhoerster.name>
date Mon, 25 Sep 2017 21:21:25 +0200
parents a7e41e1a79c8
children
comparison
equal deleted inserted replaced
26:5bdea77d0c1d 27:722a45b4028b
1 /* 1 /*
2 * Copyright (C) 2016 Guido Berhoerster <guido+pwm@berhoerster.name> 2 * Copyright (C) 2017 Guido Berhoerster <guido+pwm@berhoerster.name>
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining 4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the 5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including 6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish, 7 * without limitation the rights to use, copy, modify, merge, publish,
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */ 22 */
23 23
24 #include "compat.h" 24 #include "compat.h"
25 25
26 #include <errno.h> 26 #include <ctype.h>
27 #include <stdio.h> 27 #include <stdio.h>
28 #include <stdlib.h> 28 #include <stdlib.h>
29 #include <string.h> 29 #include <string.h>
30 30
31 #include "tok.h" 31 #include "tok.h"
32 #include "util.h"
32 33
33 enum tok_states { 34 enum tok_states {
34 STATE_INITIAL, 35 STATE_INITIAL,
35 STATE_IN_WORD, 36 STATE_IN_WORD,
36 STATE_IN_QUOTE, 37 STATE_IN_QUOTE,
37 STATE_IN_WORD_ESCAPE, 38 STATE_IN_WORD_ESCAPE,
38 STATE_IN_QUOTE_ESCAPE 39 STATE_IN_QUOTE_ESCAPE,
40 STATE_IN_MACRO
39 }; 41 };
40 42
41 static inline int 43 static inline void
42 strbuf_appendc(char **bufp, size_t *buf_sizep, int c) 44 strbuf_appendc(char **bufp, size_t *buf_sizep, int c)
43 { 45 {
44 char *buf = *bufp; 46 char *buf = *bufp;
45 size_t buf_size = *buf_sizep; 47 size_t buf_size = *buf_sizep;
46 size_t len; 48 size_t len;
48 len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0; 50 len = ((buf != NULL) && (c >= 0)) ? strlen(buf) : 0;
49 51
50 /* allocate buffer if *bufp is NULL and *buf_sizep is 0 */ 52 /* allocate buffer if *bufp is NULL and *buf_sizep is 0 */
51 if (buf_size < len + (c >= 0) + 1) { 53 if (buf_size < len + (c >= 0) + 1) {
52 buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ; 54 buf_size = (buf_size * 2 > BUFSIZ) ? buf_size * 2 : BUFSIZ;
53 buf = realloc(buf, buf_size); 55 buf = xrealloc(buf, buf_size);
54 if (buf == NULL) {
55 return (-1);
56 }
57 } 56 }
58 57
59 /* append character to string buffer or reset buffer if c is -1 */ 58 /* append character to string buffer or reset buffer if c is -1 */
60 if (c >= 0) { 59 if (c >= 0) {
61 buf[len++] = c; 60 buf[len++] = c;
62 } 61 }
63 buf[len] = '\0'; 62 buf[len] = '\0';
64 63
65 *bufp = buf; 64 *bufp = buf;
66 *buf_sizep = buf_size; 65 *buf_sizep = buf_size;
67
68 return (0);
69 } 66 }
70 67
68 void
69 tok_free(union tok **tokenv)
70 {
71 size_t i;
72
73 if (tokenv == NULL) {
74 return;
75 }
76
77 for (i = 0; tokenv[i] != NULL; i++) {
78 switch (tokenv[i]->any.type) {
79 case TOK_MACRO:
80 free(tokenv[i]->macro.name);
81 break;
82 case TOK_ARG:
83 free(tokenv[i]->arg.value);
84 break;
85 }
86 free(tokenv[i]);
87 }
88 free(tokenv);
89 }
90
71 enum tok_err 91 enum tok_err
72 tok_tokenize(const char *s, int *tokencp, char ***tokenvp) 92 tok_tokenize(const char *s, size_t *tokencp, union tok ***tokenvp)
73 { 93 {
74 int retval = TOK_ERR_SYSTEM_ERROR; 94 int retval = TOK_ERR_OK;
75 int saved_errno = 0; 95 union tok **tokenv;
76 char **tokenv;
77 size_t tokenc = 0; 96 size_t tokenc = 0;
78 const char *p = s; 97 const char *p = s;
79 enum tok_states state = STATE_INITIAL; 98 enum tok_states state = STATE_INITIAL;
80 char quote; 99 char quote;
81 char *buf = NULL; 100 char *buf = NULL;
82 size_t buf_size = 0; 101 size_t buf_size = 0;
83 char *token; 102 char *value;
84 size_t i; 103 char *name;
85 104
86 /* 105 /* allocate maximum number of tokens: ceil(length / 2) */
87 * allocate maximum number of tokens including the terminating NULL 106 tokenv = xmalloc((((strlen(s) + 2 - 1) / 2) + 1) *
88 * pointer: ceil(length / 2) + 1 107 sizeof (union tok *));
89 */
90 tokenv = malloc(((strlen(s) + 2 - 1) / 2 + 1) * sizeof (char *));
91 if (tokenv == NULL) {
92 saved_errno = errno;
93 goto out;
94 }
95 tokenv[0] = NULL;
96 108
97 for (;;) { 109 for (;;) {
98 switch (state) { 110 switch (state) {
99 case STATE_INITIAL: 111 case STATE_INITIAL:
100 switch (*p) { 112 switch (*p) {
106 case '"': /* FALLTHROUGH */ 118 case '"': /* FALLTHROUGH */
107 case '\'': 119 case '\'':
108 /* start quoted part of token */ 120 /* start quoted part of token */
109 state = STATE_IN_QUOTE; 121 state = STATE_IN_QUOTE;
110 quote = *p; 122 quote = *p;
111 if (strbuf_appendc(&buf, &buf_size, -1) != 0) { 123 strbuf_appendc(&buf, &buf_size, -1);
112 saved_errno = errno;
113 goto out;
114 }
115 break; 124 break;
116 case '\\': 125 case '\\':
117 /* start token with a backslash escape */ 126 /* start token with a backslash escape */
118 state = STATE_IN_WORD_ESCAPE; 127 state = STATE_IN_WORD_ESCAPE;
119 if (strbuf_appendc(&buf, &buf_size, -1) != 0) { 128 strbuf_appendc(&buf, &buf_size, -1);
120 saved_errno = errno; 129 break;
121 goto out; 130 case '$':
122 } 131 /* start macro token */
132 state = STATE_IN_MACRO;
133 strbuf_appendc(&buf, &buf_size, -1);
123 break; 134 break;
124 case '\0': 135 case '\0':
125 /* end of input */ 136 /* end of input */
126 retval = 0; 137 retval = TOK_ERR_OK;
127 goto out; 138 goto out;
128 default: 139 default:
129 /* start token with a word */ 140 /* start token with a word */
130 state = STATE_IN_WORD; 141 state = STATE_IN_WORD;
131 if (strbuf_appendc(&buf, &buf_size, -1) != 0) { 142 strbuf_appendc(&buf, &buf_size, -1);
132 saved_errno = errno; 143 strbuf_appendc(&buf, &buf_size, *p);
133 goto out;
134 }
135 if (strbuf_appendc(&buf, &buf_size, *p) != 0) {
136 saved_errno = errno;
137 goto out;
138 }
139 } 144 }
140 break; 145 break;
141 case STATE_IN_WORD: 146 case STATE_IN_WORD:
142 switch (*p) { 147 switch (*p) {
143 case ' ': /* FALLTHROUGH */ 148 case ' ': /* FALLTHROUGH */
144 case '\t': /* FALLTHROUGH */ 149 case '\t': /* FALLTHROUGH */
145 case '\n': /* FALLTHROUGH */ 150 case '\n': /* FALLTHROUGH */
146 case '\0': 151 case '\0':
147 /* end of token */ 152 /* end of token */
148 token = strdup(buf); 153 value = xstrdup(buf);
149 if (token == NULL) { 154 tokenv[tokenc] = xmalloc(sizeof (union tok));
150 saved_errno = errno; 155 tokenv[tokenc]->arg.type = TOK_ARG;
151 goto out; 156 tokenv[tokenc]->arg.value = value;
152 } 157 tokenc++;
153 tokenv[tokenc++] = token; 158
154 tokenv[tokenc] = NULL;
155 if (*p == '\0') { 159 if (*p == '\0') {
156 retval = 0; 160 retval = TOK_ERR_OK;
157 goto out; 161 goto out;
158 } 162 }
159 state = STATE_INITIAL; 163 state = STATE_INITIAL;
160 break; 164 break;
161 case '"': /* FALLTHROUGH */ 165 case '"': /* FALLTHROUGH */
168 /* start backslash escape */ 172 /* start backslash escape */
169 state = STATE_IN_WORD_ESCAPE; 173 state = STATE_IN_WORD_ESCAPE;
170 break; 174 break;
171 default: 175 default:
172 /* regular character */ 176 /* regular character */
173 if (strbuf_appendc(&buf, &buf_size, *p) != 0) { 177 strbuf_appendc(&buf, &buf_size, *p);
174 saved_errno = errno;
175 goto out;
176 }
177 } 178 }
178 break; 179 break;
179 case STATE_IN_QUOTE: 180 case STATE_IN_QUOTE:
180 switch (*p) { 181 switch (*p) {
181 case '"': /* FALLTHROUGH */ 182 case '"': /* FALLTHROUGH */
183 if (*p == quote) { 184 if (*p == quote) {
184 /* end quoted part of token */ 185 /* end quoted part of token */
185 state = STATE_IN_WORD; 186 state = STATE_IN_WORD;
186 } else { 187 } else {
187 /* quote quote character */ 188 /* quote quote character */
188 if (strbuf_appendc(&buf, &buf_size, 189 strbuf_appendc(&buf, &buf_size, *p);
189 *p) != 0) {
190 saved_errno = errno;
191 goto out;
192 }
193 } 190 }
194 break; 191 break;
195 case '\\': 192 case '\\':
196 /* start quoted backslash escape */ 193 /* start quoted backslash escape */
197 state = STATE_IN_QUOTE_ESCAPE; 194 state = STATE_IN_QUOTE_ESCAPE;
200 /* unclosed quote */ 197 /* unclosed quote */
201 retval = TOK_ERR_UNTERMINATED_QUOTE; 198 retval = TOK_ERR_UNTERMINATED_QUOTE;
202 goto out; 199 goto out;
203 default: 200 default:
204 /* regular character */ 201 /* regular character */
205 if (strbuf_appendc(&buf, &buf_size, *p) != 0) { 202 strbuf_appendc(&buf, &buf_size, *p);
206 saved_errno = errno;
207 goto out;
208 }
209 } 203 }
210 break; 204 break;
211 case STATE_IN_WORD_ESCAPE: /* FALLTHROUGH */ 205 case STATE_IN_WORD_ESCAPE: /* FALLTHROUGH */
212 case STATE_IN_QUOTE_ESCAPE: 206 case STATE_IN_QUOTE_ESCAPE:
213 if (*p == '\0') { 207 if (*p == '\0') {
216 goto out; 210 goto out;
217 } 211 }
218 /* escaped character */ 212 /* escaped character */
219 state = (state == STATE_IN_WORD_ESCAPE) ? 213 state = (state == STATE_IN_WORD_ESCAPE) ?
220 STATE_IN_WORD : STATE_IN_QUOTE; 214 STATE_IN_WORD : STATE_IN_QUOTE;
221 if (strbuf_appendc(&buf, &buf_size, *p) != 0) { 215 strbuf_appendc(&buf, &buf_size, *p);
222 saved_errno = errno; 216 break;
223 goto out; 217 case STATE_IN_MACRO:
218 switch (*p) {
219 case ' ': /* FALLTHROUGH */
220 case '\t': /* FALLTHROUGH */
221 case '\n': /* FALLTHROUGH */
222 case '\0':
223 /* end of token */
224 name = xstrdup(buf);
225 tokenv[tokenc] = xmalloc(sizeof (union tok));
226 tokenv[tokenc]->macro.type = TOK_MACRO;
227 tokenv[tokenc]->macro.name = name;
228 tokenc++;
229
230 if (*p == '\0') {
231 retval = TOK_ERR_OK;
232 goto out;
233 }
234 state = STATE_INITIAL;
235 break;
236 default:
237 /*
238 * macro names must only contain alphanumeric
239 * characters and underscores
240 */
241 if (!isascii(*p) || (!isalnum(*p) &&
242 (*p != '_'))) {
243 retval = TOK_ERR_INVALID_MACRO_NAME;
244 goto out;
245 }
246 strbuf_appendc(&buf, &buf_size, *p);
224 } 247 }
225 break; 248 break;
226 } 249 }
227 p++; 250 p++;
228 } 251 }
229 252
230 out: 253 out:
231 if (retval < 0) { 254 if (retval < 0) {
232 for (i = 0; i < tokenc; i++) { 255 tok_free(tokenv);
233 free(tokenv[i]);
234 }
235 free(tokenv);
236 } else { 256 } else {
257 tokenv[tokenc] = NULL;
237 *tokencp = tokenc; 258 *tokencp = tokenc;
238 *tokenvp = realloc(tokenv, (tokenc + 1) * sizeof (char *)); 259 *tokenvp = xrealloc(tokenv, (tokenc + 1) *
260 sizeof (union tok *));
239 } 261 }
240 free(buf); 262 free(buf);
241 if (retval < 0) {
242 errno = saved_errno;
243 }
244 263
245 return (retval); 264 return (retval);
246 } 265 }