comparison scanner.go @ 0:6369453d47a3

Initial revision
author Guido Berhoerster <guido+managesieve@berhoerster.name>
date Thu, 15 Oct 2020 09:11:05 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6369453d47a3
1 // Copyright (C) 2020 Guido Berhoerster <guido+managesieve@berhoerster.name>
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining
4 // a copy of this software and associated documentation files (the
5 // "Software"), to deal in the Software without restriction, including
6 // without limitation the rights to use, copy, modify, merge, publish,
7 // distribute, sublicense, and/or sell copies of the Software, and to
8 // permit persons to whom the Software is furnished to do so, subject to
9 // the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included
12 // in all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22 package managesieve
23
24 import (
25 "bufio"
26 "fmt"
27 "io"
28 "strconv"
29 "strings"
30 "unicode"
31 )
32
33 const ReadLimit = 1 * 1024 * 1024 // 1 MiB
34
35 type tokenType int
36
37 const (
38 tokenInvalid tokenType = iota
39 tokenCRLF
40 tokenLeftParenthesis
41 tokenRightParenthesis
42 tokenAtom
43 tokenQuotedString
44 tokenLiteralString
45 )
46
47 type token struct {
48 typ tokenType
49 literal string
50 }
51
52 func (t token) String() string {
53 switch t.typ {
54 case tokenInvalid:
55 return "Invalid"
56 case tokenLeftParenthesis:
57 return "LeftParenthesis: " + t.literal
58 case tokenRightParenthesis:
59 return "RightParenthesis: " + t.literal
60 case tokenAtom:
61 return "Atom: " + t.literal
62 case tokenCRLF:
63 return fmt.Sprintf("CRLF: %q", t.literal)
64 case tokenQuotedString:
65 return fmt.Sprintf("QuotedString: %q", t.literal)
66 case tokenLiteralString:
67 return fmt.Sprintf("LiteralString: %q", t.literal)
68 }
69 return fmt.Sprintf("unknown token: %q", t.literal)
70 }
71
72 type scanner struct {
73 lr *io.LimitedReader // do not read from this, only for access to N
74 br *bufio.Reader // wraps LimitReader
75 }
76
77 func newScanner(r io.Reader) *scanner {
78 lr := &io.LimitedReader{R: r, N: ReadLimit}
79 br := bufio.NewReader(lr)
80 return &scanner{lr, br}
81 }
82
83 func (s *scanner) scanCRLF() (*token, error) {
84 c, _, err := s.br.ReadRune()
85 if err != nil {
86 if err == io.EOF {
87 err = io.ErrUnexpectedEOF
88 }
89 return nil, err
90 }
91 literal := string(c)
92 // accept LF without CR
93 if c == '\r' {
94 c, _, err = s.br.ReadRune()
95 if err != nil {
96 if err == io.EOF {
97 err = io.ErrUnexpectedEOF
98 }
99 return nil, err
100 }
101 literal += string(c)
102 }
103 if c != '\n' {
104 return nil, ParserError(fmt.Sprintf(`expected '\n', got %q`, c))
105 }
106 return &token{typ: tokenCRLF, literal: literal}, nil
107 }
108
109 func (s *scanner) scanParenthesis() (*token, error) {
110 c, _, err := s.br.ReadRune()
111 if err != nil {
112 if err == io.EOF {
113 err = io.ErrUnexpectedEOF
114 }
115 return nil, err
116 }
117 var typ tokenType
118 if c == '(' {
119 typ = tokenLeftParenthesis
120 } else if c == ')' {
121 typ = tokenRightParenthesis
122 } else {
123 return nil,
124 ParserError(fmt.Sprintf("expected parenthesis, got %q",
125 c))
126 }
127 return &token{typ: typ, literal: string(c)}, nil
128 }
129
130 func isAtomRune(c rune) bool {
131 return c == '!' ||
132 (c >= 0x23 && c <= 0x27) ||
133 (c >= 0x2a && c <= 0x5b) ||
134 (c >= 0x5d && c <= 0x7a) ||
135 (c >= 0x7c && c <= 0x7e)
136 }
137
138 func (s *scanner) scanAtom() (*token, error) {
139 var sb strings.Builder
140 var c rune
141 for {
142 c, _, err := s.br.ReadRune()
143 if err != nil {
144 if err == io.EOF {
145 err = io.ErrUnexpectedEOF
146 }
147 return nil, err
148 }
149 if isAtomRune(c) {
150 sb.WriteRune(unicode.ToUpper(c))
151 } else {
152 s.br.UnreadRune()
153 break
154 }
155 }
156 if sb.Len() == 0 {
157 return nil, ParserError(fmt.Sprintf("expected atom, got %q", c))
158 }
159 return &token{typ: tokenAtom, literal: sb.String()}, nil
160 }
161
162 func (s *scanner) scanQuotedString() (*token, error) {
163 c, _, err := s.br.ReadRune()
164 if err != nil {
165 if err == io.EOF {
166 err = io.ErrUnexpectedEOF
167 }
168 return nil, err
169 }
170 if c != '"' {
171 return nil, ParserError(fmt.Sprintf("expected '\"', got %q", c))
172 }
173 qs, err := s.br.ReadString('"')
174 if err != nil {
175 if err == io.EOF {
176 err = io.ErrUnexpectedEOF
177 }
178 return nil, err
179 }
180 return &token{typ: tokenQuotedString, literal: qs[:len(qs)-1]},
181 nil
182 }
183
184 func (s *scanner) scanLiteralString() (*token, error) {
185 c, _, err := s.br.ReadRune()
186 if err != nil {
187 if err == io.EOF {
188 err = io.ErrUnexpectedEOF
189 }
190 return nil, err
191 }
192 if c != '{' {
193 return nil, ParserError(fmt.Sprintf("expected '{', got %q", c))
194 }
195 nstr, err := s.br.ReadString('}')
196 if err != nil {
197 if err == io.EOF {
198 err = io.ErrUnexpectedEOF
199 }
200 return nil, err
201 }
202 n, err := strconv.ParseUint(nstr[:len(nstr)-1], 10, 32)
203 if err != nil {
204 return nil, ParserError("failed to parse literal string length: " + err.Error())
205 }
206 if n > uint64(s.lr.N) {
207 return nil, ParserError(fmt.Sprintf("string too long: %d", n))
208 }
209
210 if _, err := s.scanCRLF(); err != nil {
211 return nil, err
212 }
213
214 b := make([]byte, n)
215 _, err = io.ReadFull(s.br, b)
216 ls := string(b)
217 if err != nil {
218 if err == io.EOF {
219 err = io.ErrUnexpectedEOF
220 }
221 return nil, err
222 }
223
224 return &token{typ: tokenLiteralString, literal: ls}, nil
225 }
226
227 func (s *scanner) skipSpace() error {
228 for {
229 b, err := s.br.ReadByte()
230 if err != nil {
231 if err == io.EOF {
232 err = io.ErrUnexpectedEOF
233 }
234 return err
235 }
236
237 if b != ' ' {
238 s.br.UnreadByte()
239 break
240 }
241 }
242
243 return nil
244 }
245
246 func (s *scanner) scan() (*token, error) {
247 if err := s.skipSpace(); err != nil {
248 return nil, err
249 }
250
251 buf, err := s.br.Peek(1)
252 if err != nil {
253 if err == io.EOF {
254 err = io.ErrUnexpectedEOF
255 }
256 return nil, err
257 }
258 b := buf[0]
259 switch {
260 case b == '\r':
261 fallthrough
262 case b == '\n':
263 return s.scanCRLF()
264 case b == '"':
265 return s.scanQuotedString()
266 case b == '{':
267 return s.scanLiteralString()
268 case b == '(':
269 fallthrough
270 case b == ')':
271 return s.scanParenthesis()
272 case isAtomRune(rune(b)):
273 return s.scanAtom()
274 }
275 return nil, ParserError(fmt.Sprintf("invalid character: %q", b))
276 }