view scanner.go @ 10:8cd45453f5be

Added tag version-0.8.1, v0.8.1 for changeset c2d5f9cfbb5a
author Guido Berhoerster <guido+managesieve@berhoerster.name>
date Wed, 03 Feb 2021 13:24:31 +0100
parents 6369453d47a3
children
line wrap: on
line source

// Copyright (C) 2020 Guido Berhoerster <guido+managesieve@berhoerster.name>
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

package managesieve

import (
	"bufio"
	"fmt"
	"io"
	"strconv"
	"strings"
	"unicode"
)

const ReadLimit = 1 * 1024 * 1024 // 1 MiB

type tokenType int

const (
	tokenInvalid tokenType = iota
	tokenCRLF
	tokenLeftParenthesis
	tokenRightParenthesis
	tokenAtom
	tokenQuotedString
	tokenLiteralString
)

type token struct {
	typ     tokenType
	literal string
}

func (t token) String() string {
	switch t.typ {
	case tokenInvalid:
		return "Invalid"
	case tokenLeftParenthesis:
		return "LeftParenthesis: " + t.literal
	case tokenRightParenthesis:
		return "RightParenthesis: " + t.literal
	case tokenAtom:
		return "Atom: " + t.literal
	case tokenCRLF:
		return fmt.Sprintf("CRLF: %q", t.literal)
	case tokenQuotedString:
		return fmt.Sprintf("QuotedString: %q", t.literal)
	case tokenLiteralString:
		return fmt.Sprintf("LiteralString: %q", t.literal)
	}
	return fmt.Sprintf("unknown token: %q", t.literal)
}

type scanner struct {
	lr *io.LimitedReader // do not read from this, only for access to N
	br *bufio.Reader     // wraps LimitReader
}

func newScanner(r io.Reader) *scanner {
	lr := &io.LimitedReader{R: r, N: ReadLimit}
	br := bufio.NewReader(lr)
	return &scanner{lr, br}
}

func (s *scanner) scanCRLF() (*token, error) {
	c, _, err := s.br.ReadRune()
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}
	literal := string(c)
	// accept LF without CR
	if c == '\r' {
		c, _, err = s.br.ReadRune()
		if err != nil {
			if err == io.EOF {
				err = io.ErrUnexpectedEOF
			}
			return nil, err
		}
		literal += string(c)
	}
	if c != '\n' {
		return nil, ParserError(fmt.Sprintf(`expected '\n', got %q`, c))
	}
	return &token{typ: tokenCRLF, literal: literal}, nil
}

func (s *scanner) scanParenthesis() (*token, error) {
	c, _, err := s.br.ReadRune()
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}
	var typ tokenType
	if c == '(' {
		typ = tokenLeftParenthesis
	} else if c == ')' {
		typ = tokenRightParenthesis
	} else {
		return nil,
			ParserError(fmt.Sprintf("expected parenthesis, got %q",
				c))
	}
	return &token{typ: typ, literal: string(c)}, nil
}

func isAtomRune(c rune) bool {
	return c == '!' ||
		(c >= 0x23 && c <= 0x27) ||
		(c >= 0x2a && c <= 0x5b) ||
		(c >= 0x5d && c <= 0x7a) ||
		(c >= 0x7c && c <= 0x7e)
}

func (s *scanner) scanAtom() (*token, error) {
	var sb strings.Builder
	var c rune
	for {
		c, _, err := s.br.ReadRune()
		if err != nil {
			if err == io.EOF {
				err = io.ErrUnexpectedEOF
			}
			return nil, err
		}
		if isAtomRune(c) {
			sb.WriteRune(unicode.ToUpper(c))
		} else {
			s.br.UnreadRune()
			break
		}
	}
	if sb.Len() == 0 {
		return nil, ParserError(fmt.Sprintf("expected atom, got %q", c))
	}
	return &token{typ: tokenAtom, literal: sb.String()}, nil
}

func (s *scanner) scanQuotedString() (*token, error) {
	c, _, err := s.br.ReadRune()
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}
	if c != '"' {
		return nil, ParserError(fmt.Sprintf("expected '\"', got %q", c))
	}
	qs, err := s.br.ReadString('"')
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}
	return &token{typ: tokenQuotedString, literal: qs[:len(qs)-1]},
		nil
}

func (s *scanner) scanLiteralString() (*token, error) {
	c, _, err := s.br.ReadRune()
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}
	if c != '{' {
		return nil, ParserError(fmt.Sprintf("expected '{', got %q", c))
	}
	nstr, err := s.br.ReadString('}')
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}
	n, err := strconv.ParseUint(nstr[:len(nstr)-1], 10, 32)
	if err != nil {
		return nil, ParserError("failed to parse literal string length: " + err.Error())
	}
	if n > uint64(s.lr.N) {
		return nil, ParserError(fmt.Sprintf("string too long: %d", n))
	}

	if _, err := s.scanCRLF(); err != nil {
		return nil, err
	}

	b := make([]byte, n)
	_, err = io.ReadFull(s.br, b)
	ls := string(b)
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}

	return &token{typ: tokenLiteralString, literal: ls}, nil
}

func (s *scanner) skipSpace() error {
	for {
		b, err := s.br.ReadByte()
		if err != nil {
			if err == io.EOF {
				err = io.ErrUnexpectedEOF
			}
			return err
		}

		if b != ' ' {
			s.br.UnreadByte()
			break
		}
	}

	return nil
}

func (s *scanner) scan() (*token, error) {
	if err := s.skipSpace(); err != nil {
		return nil, err
	}

	buf, err := s.br.Peek(1)
	if err != nil {
		if err == io.EOF {
			err = io.ErrUnexpectedEOF
		}
		return nil, err
	}
	b := buf[0]
	switch {
	case b == '\r':
		fallthrough
	case b == '\n':
		return s.scanCRLF()
	case b == '"':
		return s.scanQuotedString()
	case b == '{':
		return s.scanLiteralString()
	case b == '(':
		fallthrough
	case b == ')':
		return s.scanParenthesis()
	case isAtomRune(rune(b)):
		return s.scanAtom()
	}
	return nil, ParserError(fmt.Sprintf("invalid character: %q", b))
}