diff scanner.go @ 0:6369453d47a3

Initial revision
author Guido Berhoerster <guido+managesieve@berhoerster.name>
date Thu, 15 Oct 2020 09:11:05 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scanner.go	Thu Oct 15 09:11:05 2020 +0200
@@ -0,0 +1,276 @@
+// Copyright (C) 2020 Guido Berhoerster <guido+managesieve@berhoerster.name>
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions:
+//
+// The above copyright notice and this permission notice shall be included
+// in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+package managesieve
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+const ReadLimit = 1 * 1024 * 1024 // 1 MiB
+
+type tokenType int
+
+const (
+	tokenInvalid tokenType = iota
+	tokenCRLF
+	tokenLeftParenthesis
+	tokenRightParenthesis
+	tokenAtom
+	tokenQuotedString
+	tokenLiteralString
+)
+
+type token struct {
+	typ     tokenType
+	literal string
+}
+
+func (t token) String() string {
+	switch t.typ {
+	case tokenInvalid:
+		return "Invalid"
+	case tokenLeftParenthesis:
+		return "LeftParenthesis: " + t.literal
+	case tokenRightParenthesis:
+		return "RightParenthesis: " + t.literal
+	case tokenAtom:
+		return "Atom: " + t.literal
+	case tokenCRLF:
+		return fmt.Sprintf("CRLF: %q", t.literal)
+	case tokenQuotedString:
+		return fmt.Sprintf("QuotedString: %q", t.literal)
+	case tokenLiteralString:
+		return fmt.Sprintf("LiteralString: %q", t.literal)
+	}
+	return fmt.Sprintf("unknown token: %q", t.literal)
+}
+
+type scanner struct {
+	lr *io.LimitedReader // do not read from this, only for access to N
+	br *bufio.Reader     // wraps LimitReader
+}
+
+func newScanner(r io.Reader) *scanner {
+	lr := &io.LimitedReader{R: r, N: ReadLimit}
+	br := bufio.NewReader(lr)
+	return &scanner{lr, br}
+}
+
+func (s *scanner) scanCRLF() (*token, error) {
+	c, _, err := s.br.ReadRune()
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+	literal := string(c)
+	// accept LF without CR
+	if c == '\r' {
+		c, _, err = s.br.ReadRune()
+		if err != nil {
+			if err == io.EOF {
+				err = io.ErrUnexpectedEOF
+			}
+			return nil, err
+		}
+		literal += string(c)
+	}
+	if c != '\n' {
+		return nil, ParserError(fmt.Sprintf(`expected '\n', got %q`, c))
+	}
+	return &token{typ: tokenCRLF, literal: literal}, nil
+}
+
+func (s *scanner) scanParenthesis() (*token, error) {
+	c, _, err := s.br.ReadRune()
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+	var typ tokenType
+	if c == '(' {
+		typ = tokenLeftParenthesis
+	} else if c == ')' {
+		typ = tokenRightParenthesis
+	} else {
+		return nil,
+			ParserError(fmt.Sprintf("expected parenthesis, got %q",
+				c))
+	}
+	return &token{typ: typ, literal: string(c)}, nil
+}
+
+func isAtomRune(c rune) bool {
+	return c == '!' ||
+		(c >= 0x23 && c <= 0x27) ||
+		(c >= 0x2a && c <= 0x5b) ||
+		(c >= 0x5d && c <= 0x7a) ||
+		(c >= 0x7c && c <= 0x7e)
+}
+
+func (s *scanner) scanAtom() (*token, error) {
+	var sb strings.Builder
+	var c rune
+	for {
+		c, _, err := s.br.ReadRune()
+		if err != nil {
+			if err == io.EOF {
+				err = io.ErrUnexpectedEOF
+			}
+			return nil, err
+		}
+		if isAtomRune(c) {
+			sb.WriteRune(unicode.ToUpper(c))
+		} else {
+			s.br.UnreadRune()
+			break
+		}
+	}
+	if sb.Len() == 0 {
+		return nil, ParserError(fmt.Sprintf("expected atom, got %q", c))
+	}
+	return &token{typ: tokenAtom, literal: sb.String()}, nil
+}
+
+func (s *scanner) scanQuotedString() (*token, error) {
+	c, _, err := s.br.ReadRune()
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+	if c != '"' {
+		return nil, ParserError(fmt.Sprintf("expected '\"', got %q", c))
+	}
+	qs, err := s.br.ReadString('"')
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+	return &token{typ: tokenQuotedString, literal: qs[:len(qs)-1]},
+		nil
+}
+
+func (s *scanner) scanLiteralString() (*token, error) {
+	c, _, err := s.br.ReadRune()
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+	if c != '{' {
+		return nil, ParserError(fmt.Sprintf("expected '{', got %q", c))
+	}
+	nstr, err := s.br.ReadString('}')
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+	n, err := strconv.ParseUint(nstr[:len(nstr)-1], 10, 32)
+	if err != nil {
+		return nil, ParserError("failed to parse literal string length: " + err.Error())
+	}
+	if n > uint64(s.lr.N) {
+		return nil, ParserError(fmt.Sprintf("string too long: %d", n))
+	}
+
+	if _, err := s.scanCRLF(); err != nil {
+		return nil, err
+	}
+
+	b := make([]byte, n)
+	_, err = io.ReadFull(s.br, b)
+	ls := string(b)
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+
+	return &token{typ: tokenLiteralString, literal: ls}, nil
+}
+
+func (s *scanner) skipSpace() error {
+	for {
+		b, err := s.br.ReadByte()
+		if err != nil {
+			if err == io.EOF {
+				err = io.ErrUnexpectedEOF
+			}
+			return err
+		}
+
+		if b != ' ' {
+			s.br.UnreadByte()
+			break
+		}
+	}
+
+	return nil
+}
+
+func (s *scanner) scan() (*token, error) {
+	if err := s.skipSpace(); err != nil {
+		return nil, err
+	}
+
+	buf, err := s.br.Peek(1)
+	if err != nil {
+		if err == io.EOF {
+			err = io.ErrUnexpectedEOF
+		}
+		return nil, err
+	}
+	b := buf[0]
+	switch {
+	case b == '\r':
+		fallthrough
+	case b == '\n':
+		return s.scanCRLF()
+	case b == '"':
+		return s.scanQuotedString()
+	case b == '{':
+		return s.scanLiteralString()
+	case b == '(':
+		fallthrough
+	case b == ')':
+		return s.scanParenthesis()
+	case isAtomRune(rune(b)):
+		return s.scanAtom()
+	}
+	return nil, ParserError(fmt.Sprintf("invalid character: %q", b))
+}