Mercurial > projects > managesieve
comparison scanner.go @ 0:6369453d47a3
Initial revision
author | Guido Berhoerster <guido+managesieve@berhoerster.name> |
---|---|
date | Thu, 15 Oct 2020 09:11:05 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6369453d47a3 |
---|---|
1 // Copyright (C) 2020 Guido Berhoerster <guido+managesieve@berhoerster.name> | |
2 // | |
3 // Permission is hereby granted, free of charge, to any person obtaining | |
4 // a copy of this software and associated documentation files (the | |
5 // "Software"), to deal in the Software without restriction, including | |
6 // without limitation the rights to use, copy, modify, merge, publish, | |
7 // distribute, sublicense, and/or sell copies of the Software, and to | |
8 // permit persons to whom the Software is furnished to do so, subject to | |
9 // the following conditions: | |
10 // | |
11 // The above copyright notice and this permission notice shall be included | |
12 // in all copies or substantial portions of the Software. | |
13 // | |
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
15 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
16 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
17 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |
18 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |
19 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
20 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
21 | |
22 package managesieve | |
23 | |
24 import ( | |
25 "bufio" | |
26 "fmt" | |
27 "io" | |
28 "strconv" | |
29 "strings" | |
30 "unicode" | |
31 ) | |
32 | |
33 const ReadLimit = 1 * 1024 * 1024 // 1 MiB | |
34 | |
35 type tokenType int | |
36 | |
37 const ( | |
38 tokenInvalid tokenType = iota | |
39 tokenCRLF | |
40 tokenLeftParenthesis | |
41 tokenRightParenthesis | |
42 tokenAtom | |
43 tokenQuotedString | |
44 tokenLiteralString | |
45 ) | |
46 | |
47 type token struct { | |
48 typ tokenType | |
49 literal string | |
50 } | |
51 | |
52 func (t token) String() string { | |
53 switch t.typ { | |
54 case tokenInvalid: | |
55 return "Invalid" | |
56 case tokenLeftParenthesis: | |
57 return "LeftParenthesis: " + t.literal | |
58 case tokenRightParenthesis: | |
59 return "RightParenthesis: " + t.literal | |
60 case tokenAtom: | |
61 return "Atom: " + t.literal | |
62 case tokenCRLF: | |
63 return fmt.Sprintf("CRLF: %q", t.literal) | |
64 case tokenQuotedString: | |
65 return fmt.Sprintf("QuotedString: %q", t.literal) | |
66 case tokenLiteralString: | |
67 return fmt.Sprintf("LiteralString: %q", t.literal) | |
68 } | |
69 return fmt.Sprintf("unknown token: %q", t.literal) | |
70 } | |
71 | |
72 type scanner struct { | |
73 lr *io.LimitedReader // do not read from this, only for access to N | |
74 br *bufio.Reader // wraps LimitReader | |
75 } | |
76 | |
77 func newScanner(r io.Reader) *scanner { | |
78 lr := &io.LimitedReader{R: r, N: ReadLimit} | |
79 br := bufio.NewReader(lr) | |
80 return &scanner{lr, br} | |
81 } | |
82 | |
83 func (s *scanner) scanCRLF() (*token, error) { | |
84 c, _, err := s.br.ReadRune() | |
85 if err != nil { | |
86 if err == io.EOF { | |
87 err = io.ErrUnexpectedEOF | |
88 } | |
89 return nil, err | |
90 } | |
91 literal := string(c) | |
92 // accept LF without CR | |
93 if c == '\r' { | |
94 c, _, err = s.br.ReadRune() | |
95 if err != nil { | |
96 if err == io.EOF { | |
97 err = io.ErrUnexpectedEOF | |
98 } | |
99 return nil, err | |
100 } | |
101 literal += string(c) | |
102 } | |
103 if c != '\n' { | |
104 return nil, ParserError(fmt.Sprintf(`expected '\n', got %q`, c)) | |
105 } | |
106 return &token{typ: tokenCRLF, literal: literal}, nil | |
107 } | |
108 | |
109 func (s *scanner) scanParenthesis() (*token, error) { | |
110 c, _, err := s.br.ReadRune() | |
111 if err != nil { | |
112 if err == io.EOF { | |
113 err = io.ErrUnexpectedEOF | |
114 } | |
115 return nil, err | |
116 } | |
117 var typ tokenType | |
118 if c == '(' { | |
119 typ = tokenLeftParenthesis | |
120 } else if c == ')' { | |
121 typ = tokenRightParenthesis | |
122 } else { | |
123 return nil, | |
124 ParserError(fmt.Sprintf("expected parenthesis, got %q", | |
125 c)) | |
126 } | |
127 return &token{typ: typ, literal: string(c)}, nil | |
128 } | |
129 | |
130 func isAtomRune(c rune) bool { | |
131 return c == '!' || | |
132 (c >= 0x23 && c <= 0x27) || | |
133 (c >= 0x2a && c <= 0x5b) || | |
134 (c >= 0x5d && c <= 0x7a) || | |
135 (c >= 0x7c && c <= 0x7e) | |
136 } | |
137 | |
138 func (s *scanner) scanAtom() (*token, error) { | |
139 var sb strings.Builder | |
140 var c rune | |
141 for { | |
142 c, _, err := s.br.ReadRune() | |
143 if err != nil { | |
144 if err == io.EOF { | |
145 err = io.ErrUnexpectedEOF | |
146 } | |
147 return nil, err | |
148 } | |
149 if isAtomRune(c) { | |
150 sb.WriteRune(unicode.ToUpper(c)) | |
151 } else { | |
152 s.br.UnreadRune() | |
153 break | |
154 } | |
155 } | |
156 if sb.Len() == 0 { | |
157 return nil, ParserError(fmt.Sprintf("expected atom, got %q", c)) | |
158 } | |
159 return &token{typ: tokenAtom, literal: sb.String()}, nil | |
160 } | |
161 | |
162 func (s *scanner) scanQuotedString() (*token, error) { | |
163 c, _, err := s.br.ReadRune() | |
164 if err != nil { | |
165 if err == io.EOF { | |
166 err = io.ErrUnexpectedEOF | |
167 } | |
168 return nil, err | |
169 } | |
170 if c != '"' { | |
171 return nil, ParserError(fmt.Sprintf("expected '\"', got %q", c)) | |
172 } | |
173 qs, err := s.br.ReadString('"') | |
174 if err != nil { | |
175 if err == io.EOF { | |
176 err = io.ErrUnexpectedEOF | |
177 } | |
178 return nil, err | |
179 } | |
180 return &token{typ: tokenQuotedString, literal: qs[:len(qs)-1]}, | |
181 nil | |
182 } | |
183 | |
184 func (s *scanner) scanLiteralString() (*token, error) { | |
185 c, _, err := s.br.ReadRune() | |
186 if err != nil { | |
187 if err == io.EOF { | |
188 err = io.ErrUnexpectedEOF | |
189 } | |
190 return nil, err | |
191 } | |
192 if c != '{' { | |
193 return nil, ParserError(fmt.Sprintf("expected '{', got %q", c)) | |
194 } | |
195 nstr, err := s.br.ReadString('}') | |
196 if err != nil { | |
197 if err == io.EOF { | |
198 err = io.ErrUnexpectedEOF | |
199 } | |
200 return nil, err | |
201 } | |
202 n, err := strconv.ParseUint(nstr[:len(nstr)-1], 10, 32) | |
203 if err != nil { | |
204 return nil, ParserError("failed to parse literal string length: " + err.Error()) | |
205 } | |
206 if n > uint64(s.lr.N) { | |
207 return nil, ParserError(fmt.Sprintf("string too long: %d", n)) | |
208 } | |
209 | |
210 if _, err := s.scanCRLF(); err != nil { | |
211 return nil, err | |
212 } | |
213 | |
214 b := make([]byte, n) | |
215 _, err = io.ReadFull(s.br, b) | |
216 ls := string(b) | |
217 if err != nil { | |
218 if err == io.EOF { | |
219 err = io.ErrUnexpectedEOF | |
220 } | |
221 return nil, err | |
222 } | |
223 | |
224 return &token{typ: tokenLiteralString, literal: ls}, nil | |
225 } | |
226 | |
227 func (s *scanner) skipSpace() error { | |
228 for { | |
229 b, err := s.br.ReadByte() | |
230 if err != nil { | |
231 if err == io.EOF { | |
232 err = io.ErrUnexpectedEOF | |
233 } | |
234 return err | |
235 } | |
236 | |
237 if b != ' ' { | |
238 s.br.UnreadByte() | |
239 break | |
240 } | |
241 } | |
242 | |
243 return nil | |
244 } | |
245 | |
246 func (s *scanner) scan() (*token, error) { | |
247 if err := s.skipSpace(); err != nil { | |
248 return nil, err | |
249 } | |
250 | |
251 buf, err := s.br.Peek(1) | |
252 if err != nil { | |
253 if err == io.EOF { | |
254 err = io.ErrUnexpectedEOF | |
255 } | |
256 return nil, err | |
257 } | |
258 b := buf[0] | |
259 switch { | |
260 case b == '\r': | |
261 fallthrough | |
262 case b == '\n': | |
263 return s.scanCRLF() | |
264 case b == '"': | |
265 return s.scanQuotedString() | |
266 case b == '{': | |
267 return s.scanLiteralString() | |
268 case b == '(': | |
269 fallthrough | |
270 case b == ')': | |
271 return s.scanParenthesis() | |
272 case isAtomRune(rune(b)): | |
273 return s.scanAtom() | |
274 } | |
275 return nil, ParserError(fmt.Sprintf("invalid character: %q", b)) | |
276 } |