view js/feed-parser.js @ 22:38b20de704a0

Use seperate grey icon for the page action This does not stand out so much as the orange icon espeacially when using dark themes and should be readable on any theme since there is no transparency. Currently there is neither a way for addons to specify themed icons for page actions nor is it possible to use the browser's foreground and background color as native page action icons do.
author Guido Berhoerster <guido+feed-preview@berhoerster.name>
date Sun, 16 Dec 2018 10:22:19 +0100
parents 3fcd2209b39a
children da483ce3832d
line wrap: on
line source

/*
 * Copyright (C) 2018 Guido Berhoerster <guido+feed-preview@berhoerster.name>
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

'use strict';

export const XMLNS = {
    ATOM03: 'http://purl.org/atom/ns#',
    ATOM10: 'http://www.w3.org/2005/Atom',
    RSS09: 'http://my.netscape.com/rdf/simple/0.9/',
    RSS10: 'http://purl.org/rss/1.0/',
    XHTML: 'http://www.w3.org/1999/xhtml',
    PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml'
}
const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']);

function encodeXML(str) {
    return str.replace(/[<>&'"]/g, c => {
        switch (c) {
            case '<': return '&lt;';
            case '>': return '&gt;';
            case '&': return '&amp;';
            case '\'': return '&apos;';
            case '"': return '&quot;';
        }
    });
}

function parseDate(s) {
    let date = new Date(s);

    return isNaN(date) ? new Date(0) : date;
}

function parseURL(text, baseURL = '') {
    let url;

    try {
        url = new URL(text, baseURL);
    } catch (e) {
        return null;
    }
    if (!ALLOWED_LINK_PROTOCOLS.has(url.protocol)) {
        return null;
    }

    return url;
}

function base64Decode(base64Str) {
    let encodedText;
    try {
        encodedText = atob(base64Str);
    } catch (e) {
        throw (e instanceof DOMException) ? new TypeError(e.message) : e;
    }
    let byteBuffer = new Uint8Array(new ArrayBuffer(encodedText.length));
    for (let i = 0; i < encodedText.length; i++) {
        byteBuffer[i] = encodedText.charCodeAt(i);
    }
    return new TextDecoder().decode(byteBuffer);
}

function feedNSResolver(prefix) {
    switch (prefix) {
        case 'atom03':
            return XMLNS.ATOM03;
        case 'atom':
            return XMLNS.ATOM10;
        case 'rss09':
            return XMLNS.RSS09;
        case 'rss10':
            return XMLNS.RSS10;
    }
    return null;
}

function feedQueryXPath(feedDocument, scopeElement, xpathQuery) {
    return feedDocument.evaluate(xpathQuery, scopeElement, feedNSResolver,
            XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
}

function feedQueryXPathAll(feedDocument, scopeElement, xpathQuery) {
    let result = feedDocument.evaluate(xpathQuery, scopeElement, feedNSResolver,
            XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
    let nodes = [];
    for (let node = result.iterateNext(); node !== null;
            node = result.iterateNext()) {
        nodes.push(node);
    }

    return nodes;
}

export class ParserError extends Error {
    constructor() {
        super(...arguments);
        this.name = this.constructor.name;
    }
}

export class UnsupportedFeedTypeError extends Error {
    constructor(message = 'Document is not a supported feed', ...params) {
        super(message, ...params);
        this.name = this.constructor.name;
    }
}

export class ProtocolError extends Error {
    constructor(url, status, statusText, ...params) {
        let message = `Protocol error: Transfer of ${url} failed with: ` +
                `${status} ${statusText}`
        super(message, ...params);
        this.name = this.constructor.name;
        this.url = url;
        this.status = status;
        this.statusText = statusText;
    }
}

class FeedLogo {
    constructor(url, {title = ''} = {}) {
        this.url = url;
        this.title = title;
    }
}

class FeedEntryFile {
    constructor(url, {type = browser.i18n.getMessage('defaultFileType'),
            size = 0} = {}) {
        this.filename = undefined;
        this._url = undefined;
        this.url = url;
        this.type = type;
        this.size = size;
    }

    set url(url) {
        this._url = url;
        let filename = url.pathname.split('/').pop();
        this.filename = filename !== '' ? filename :
                browser.i18n.getMessage('defaultFileName');
    }

    get url() {
        return this._url;
    }
}

class FeedEntry {
    constructor({title = browser.i18n.getMessage('defaultFeedEntryTitle'),
            link = undefined, date = new Date(0), content = '',
            files = []} = {}) {
        this.title = title;
        this.link = link;
        this.date = date;
        this._content = undefined;
        this.content = content;
        this.files = files;
    }

    normalizeContent(text) {
        if (typeof text === 'undefined') {
            return
        }

        let contentDocument = document.implementation.createHTMLDocument();
        let parsedDocument = new DOMParser().parseFromString(text, 'text/html');
        contentDocument.body = contentDocument.adoptNode(parsedDocument.body);
        return new XMLSerializer().serializeToString(contentDocument);
    }

    set content(content) {
        this._content = this.normalizeContent(content);
    }

    get content() {
        return this._content;
    }
}

class Feed {
    constructor(url, {title = browser.i18n.getMessage('defaultFeedTitle'),
            subtitle = '', logo, entries = []} = {}) {
        this.url = url;
        this.title = title;
        this.subtitle = subtitle;
        this.logo = logo;
        this.entries = entries;
    }
}

export class FeedParser {
    static probeFeed(feedDocument) {
        let documentElement = feedDocument.documentElement;
        if (documentElement.nodeName === 'feed' &&
                documentElement.namespaceURI === XMLNS.ATOM03) {
            return ['atom', '0.3'];
        } else if (documentElement.nodeName === 'feed' &&
                documentElement.namespaceURI === XMLNS.ATOM10) {
            return ['atom', '1.0'];
        } else if (documentElement.nodeName === 'rss') {
            let version = documentElement.getAttribute('version');
            switch (version) {
                case '0.90':
                case '0.91':
                case '0.92':
                case '0.93':
                case '0.94':
                case '2.0':
                    return ['rss', version];
            }
        } else if (documentElement.localName.toLowerCase() === 'rdf' &&
                documentElement.getAttribute('xmlns') === XMLNS.RSS09) {
            return ['rss', '0.9'];
        } else if (documentElement.localName.toLowerCase() === 'rdf' &&
                documentElement.getAttribute('xmlns') === XMLNS.RSS10) {
            return ['rss', '1.0'];
        }

        return [undefined, undefined];
    }

    constructor() {
        this.url = undefined;
        this.document = undefined;
    }

    parseAtom03ContentConstruct(containerElement, textOnly = true) {
        let contentType = containerElement.getAttribute('type');
        let contentMode = containerElement.getAttribute('mode');
        if (contentType === null) {
            contentType = 'text/plain';
        }
        if (contentMode === null) {
            contentMode = 'xml';
        }
        if (contentType === 'application/xhtml+xml') {
            let htmlText;
            if (contentMode === 'xml') {
                return textOnly ? containerElement.textContent.trim() :
                        containerElement.innerHTML;
            } else if (contentMode === 'escaped') {
                htmlText = containerElement.textContent;
            } else if (contentMode === 'base64') {
                htmlText = base64Decode(containerElement.textContent);
            }
            if (typeof htmlText === 'undefined') {
                return;
            }
            if (textOnly) {
                let htmlDocument = new DOMParser().parseFromString(htmlText,
                        'application/xhtml+xml');
                if (htmlDocument.documentElement.namespaceURI ===
                        XMLNS.PARSERERROR) {
                    return;
                }
                return htmlDocument.body.textContent.trim();
            }
            return htmlText;
        } else if (contentType === 'text/html') {
            let htmlText;
            if (contentMode === 'escaped') {
                htmlText = containerElement.textContent;
            } else if (contentMode === 'base64') {
                htmlText = base64Decode(containerElement.textContent);
            }
            if (typeof htmlText === 'undefined') {
                return;
            }
            if (textOnly) {
                let htmlDocument = new DOMParser().parseFromString(htmlText,
                        'text/html');
                return htmlDocument.body.textContent.trim();
            }
            return htmlText;
        } else if (contentType === 'text/plain') {
            let text;
            if (contentMode === 'escaped') {
                text = containerElement.textContent;
            } else if (contentMode === 'base64') {
                text = base64Decode(containerElement.textContent);
            }
            if (typeof text === 'undefined') {
                return;
            }
            return textOnly ? text : `<pre>${encodeXML(text)}</pre>`;
        }
        return;
    }

    parseAtom03Content(contentElement) {
        // ordered from lowest to highest preference
        const contentTypes = [
            'text/plain',
            'text/html',
            'application/xhtml+xml'
        ];
        if (contentElement.getAttribute('type') === 'multipart/alternative' &&
                contentElement.getAttribute('mode') === null) {
            // select alternative according to above preference
            let selectedTypeIndex = -1;
            let selectedElement;
            for (let innerContentElement of contentElement.children) {
                if (innerContentElement.localName !== 'content' ||
                        innerContentElement.namespaceURI !== XMLNS.ATOM03) {
                    throw new TypeError('child elements of a multipart ' +
                            ' content elements must be content elements');
                }
                let innerContentType = innerContentElement.getAttribute('type');
                if (innerContentType === null) {
                    innerContentType = 'text/plain';
                }
                let typeIndex = contentTypes.indexOf(innerContentType);
                if (typeIndex > selectedTypeIndex) {
                    selectedTypeIndex = typeIndex;
                    selectedElement = innerContentElement;
                }
            }
            if (selectedTypeIndex >= 0) {
                contentElement = selectedElement;
            }
        }

        return this.parseAtom03ContentConstruct(contentElement, false);
    }

    parseAtom03Entry(entryElement) {
        let title;
        let link;
        let date;
        let content;
        let titleElement = feedQueryXPath(this.document, entryElement,
                './atom03:title');
        if (titleElement !== null) {
            title = titleElement.textContent.trim();
        }

        let linkElement = feedQueryXPath(this.document, entryElement,
                './atom03:link[@href][@rel="alternate"]');
        if (linkElement !== null) {
            link = parseURL(linkElement.getAttribute('href'), this.url);
        }

        let modifiedElement = feedQueryXPath(this.document, entryElement,
                './atom03:modified');
        if (modifiedElement !== null) {
            date = parseDate(modifiedElement.textContent);
        }

        let contentElement = feedQueryXPath(this.document, entryElement,
                './atom03:content');
        if (contentElement !== null) {
            try {
                content = this.parseAtom03Content(contentElement);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }
        if (typeof content === 'undefined') {
            let summaryElement = feedQueryXPath(this.document, entryElement,
                    './atom03:summary');
            if (summaryElement !== null) {
                try {
                    content = this.parseAtom03ContentConstruct(summaryElement,
                            false);
                } catch (e) {
                    if (!(e instanceof TypeError)) {
                        throw e;
                    }
                }
            }
        }

        return new FeedEntry({title, link, date, content});
    }

    parseAtom03Feed() {
        let title;
        let subtitle;
        let logo;
        let entries = [];
        let documentElement = this.document.documentElement;

        let titleElement = feedQueryXPath(this.document, documentElement,
                './atom03:title');
        if (titleElement !== null) {
            try {
                title = this.parseAtom03ContentConstruct(titleElement);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        let taglineElement = feedQueryXPath(this.document, documentElement,
                './atom03:tagline');
        if (taglineElement !== null) {
            try {
                title = this.parseAtom03ContentConstruct(taglineElement);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        let logoElement = feedQueryXPath(this.document, documentElement,
                './atom03:logo');
        if (logoElement !== null) {
            try {
                logo = this.parseAtomLogo(logoElement);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        let entryElements = feedQueryXPathAll(this.document, documentElement,
                './atom03:entry');
        for (let entryElement of entryElements) {
            entries.push(this.parseAtom03Entry(entryElement));
        }

        return new Feed(this.url, {title, subtitle, logo, entries});
    }

    parseAtomLogo(logoElement) {
        let url = parseURL(logoElement.textContent.trim(), this.url);
        if (url === null) {
            throw new TypeError('invalid URL in <logo> element');
        }
        return new FeedLogo(url);
    }

    parseAtomTextConstruct(containerElement, textOnly = true) {
        let contentType = containerElement.getAttribute('type');
        if (contentType === null) {
            contentType = 'text';
        }

        if (contentType === 'xhtml') {
            let xhtmlRootElement = containerElement.firstElementChild;
            if (xhtmlRootElement !== null &&
                    xhtmlRootElement.localName === 'div' &&
                    xhtmlRootElement.namespaceURI === XMLNS.XHTML) {
                return textOnly ? xhtmlRootElement.textContent.trim() :
                        xhtmlRootElement.innerHTML;
            }
        } else if (contentType === 'html') {
            let htmlText = containerElement.textContent;
            if (textOnly) {
                let htmlDocument = new DOMParser().parseFromString(htmlText,
                        'text/html');
                return htmlDocument.body.textContent.trim();
            }
            return htmlText
        } else if (contentType === 'text') {
            let text = containerElement.textContent.trim();
            return textOnly ? text : `<pre>${encodeXML(text)}</pre>`;
        }

        // unsupported content type
        return;
    }

    parseAtomContent(contentElement) {
        let contentSrc = contentElement.getAttribute('src');
        if (contentSrc !== null) {
            // externally referenced content is not supported
            return;
        }
        return this.parseAtomTextConstruct(contentElement, false);
    }

    parseAtomEntryFile(enclosureElement) {
        let type;
        let size;
        let url = parseURL(enclosureElement.getAttribute('href'), this.url);
        if (url === null) {
            throw new TypeError('invalid URL in enclosure href attribute');
        }

        let typeAttribute = enclosureElement.getAttribute('type');
        if (typeAttribute !== null) {
            type = typeAttribute;
        }

        let length = parseInt(enclosureElement.getAttribute('length'), 10);
        if (!isNaN(length)) {
            size = length;
        }

        return new FeedEntryFile(url, {type, size});
    }

    parseAtomEntry(entryElement) {
        let title;
        let link;
        let date;
        let content;
        let files = [];
        let titleElement = feedQueryXPath(this.document, entryElement,
                './atom:title');
        if (titleElement !== null) {
            title = this.parseAtomTextConstruct(titleElement);
        }

        let linkElement = feedQueryXPath(this.document, entryElement,
                './atom:link[@href][not(@rel) or @rel="alternate"]');
        if (linkElement !== null) {
            link = parseURL(linkElement.getAttribute('href'), this.url);
        }

        let updatedElement = feedQueryXPath(this.document, entryElement,
                './atom:updated');
        if (updatedElement !== null) {
            date = parseDate(updatedElement.textContent);
        }

        let contentElement = feedQueryXPath(this.document, entryElement,
                './atom:content');
        if (contentElement !== null) {
            content = this.parseAtomContent(contentElement);
        }
        if (typeof content === 'undefined') {
            let summaryElement = feedQueryXPath(this.document, entryElement,
                    './atom:summary');
            if (summaryElement !== null) {
                content = this.parseAtomTextConstruct(summaryElement, false);
            }
        }

        for (let enclosureElement of feedQueryXPathAll(this.document,
                entryElement, './atom:link[@href][@rel="enclosure"]')) {
            try {
                let entryFile = this.parseAtomEntryFile(enclosureElement);
                files.push(entryFile);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        return new FeedEntry({title, link, date, content, files});
    }

    parseAtomFeed() {
        let title;
        let subtitle;
        let logo;
        let entries = [];
        let documentElement = this.document.documentElement;

        let titleElement = feedQueryXPath(this.document, documentElement,
                './atom:title');
        if (titleElement !== null) {
            title = this.parseAtomTextConstruct(titleElement);
        }

        let subtitleElement = feedQueryXPath(this.document, documentElement,
                './atom:subtitle');
        if (subtitleElement !== null) {
            subtitle = this.parseAtomTextConstruct(subtitleElement);
        }

        let logoElement = feedQueryXPath(this.document, documentElement,
                './atom:logo');
        if (logoElement !== null) {
            try {
                logo = this.parseAtomLogo(logoElement);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        let entryElements = feedQueryXPathAll(this.document, documentElement,
                './atom:entry');
        for (let entryElement of entryElements) {
            entries.push(this.parseAtomEntry(entryElement));
        }

        return new Feed(this.url, {title, subtitle, logo, entries});
    }

    parseRSS1Logo(imageElement, nsPrefix) {
        let title;
        let urlElement = feedQueryXPath(this.document, imageElement,
                `./${nsPrefix}:url`);
        if (urlElement === null) {
            throw new TypeError('missing <url> element in <logo> element');
        }
        let url = parseURL(urlElement.textContent.trim(), this.url);
        if (url === null) {
            throw new TypeError('invalid URL in <logo> element');
        }

        let titleElement = feedQueryXPath(this.document, imageElement,
                `./${nsPrefix}:title`);
        if (titleElement !== null) {
            title = titleElement.textContent.trim();
        }

        return new FeedLogo(url, {title});
    }

    parseRSS1Entry(itemElement, nsPrefix) {
        let title;
        let link;
        let titleElement = feedQueryXPath(this.document, itemElement,
                `./${nsPrefix}:title`);
        if (titleElement !== null) {
            title = titleElement.textContent;
        }

        let linkElement = feedQueryXPath(this.document, itemElement,
                `./${nsPrefix}:link`);
        if (linkElement !== null) {
            link = parseURL(linkElement.textContent, this.url);
        }

        return new FeedEntry({title, link});
    }

    parseRSS1Feed(version) {
        let nsPrefix = version === '0.9' ? 'rss09' : 'rss10';
        let title;
        let subtitle;
        let logo;
        let entries = [];
        let documentElement = this.document.documentElement;
        let titleElement = feedQueryXPath(this.document, documentElement,
                `./${nsPrefix}:channel/${nsPrefix}:title`);
        if (titleElement !== null) {
            title = titleElement.textContent;
        }

        let descriptionElement = feedQueryXPath(this.document, documentElement,
                `./${nsPrefix}:channel/${nsPrefix}:description`);
        if (descriptionElement !== null) {
            subtitle = descriptionElement.textContent;
        }

        let imageElement = feedQueryXPath(this.document, documentElement,
                `./${nsPrefix}:image`);
        if (imageElement !== null) {
            try {
                logo = this.parseRSS1Logo(imageElement, nsPrefix);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        let itemElements = feedQueryXPathAll(this.document, documentElement,
                `./${nsPrefix}:item`);
        for (let itemElement of itemElements) {
            let entry = this.parseRSS1Entry(itemElement, nsPrefix);
            if (typeof entry !== 'undefined') {
                entries.push(entry);
            }
        }

        return new Feed(this.url, {title, subtitle, logo, entries});
    }

    parseRSS2Logo(imageElement) {
        let title;
        let urlElement = feedQueryXPath(this.document, imageElement, './url');
        if (urlElement === null) {
            throw new TypeError('missing <url> element in <logo> element');
        }
        let url = parseURL(urlElement.textContent.trim(), this.url);
        if (url === null) {
            throw new TypeError('invalid URL in <logo> element');
        }

        let titleElement = feedQueryXPath(this.document, imageElement,
                './title');
        if (titleElement !== null) {
            title = titleElement.textContent.trim();
        }

        return new FeedLogo(url, {title});
    }

    parseRSS2EntryFile(enclosureElement) {
        let type;
        let size;
        let url = parseURL(enclosureElement.getAttribute('url'), this.url);
        if (url === null) {
            throw new TypeError('invalid URL in <enclosure> element');
        }

        let typeAttribute = enclosureElement.getAttribute('type');
        if (typeAttribute !== null) {
            type = typeAttribute;
        }

        let length = parseInt(enclosureElement.getAttribute('length'),
                10);
        if (!isNaN(length)) {
            size = length;
        }

        return new FeedEntryFile(url, {type, size});
    }

    parseRSS2Entry(itemElement) {
        let title;
        let link;
        let date;
        let content;
        let files = [];
        let titleElement = feedQueryXPath(this.document, itemElement,
                './title');
        if (titleElement !== null) {
            title = titleElement.textContent;
        }

        let linkElement = feedQueryXPath(this.document, itemElement, './link');
        if (linkElement !== null) {
            link = parseURL(linkElement.textContent, this.url);
        }

        let pubDateElement = feedQueryXPath(this.document, itemElement,
                './pubDate');
        if (pubDateElement !== null) {
            date = parseDate(pubDateElement.textContent);
        }

        let descriptionElement = feedQueryXPath(this.document, itemElement,
                './description');
        if (descriptionElement !== null) {
            content = descriptionElement.textContent.trim();
        }

        for (let enclosureElement of
                feedQueryXPathAll(this.document, itemElement, './enclosure')) {
            try {
                let entryFile = this.parseRSS2EntryFile(enclosureElement);
                files.push(entryFile);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        return new FeedEntry({title, link, date, content, files});
    }

    parseRSS2Feed() {
        let title;
        let subtitle;
        let logo;
        let entries = [];
        let documentElement = this.document.documentElement;
        let titleElement = feedQueryXPath(this.document, documentElement,
                './channel/title');
        if (titleElement !== null) {
            title = titleElement.textContent;
        }

        let descriptionElement = feedQueryXPath(this.document, documentElement,
                './channel/description');
        if (descriptionElement !== null) {
            subtitle = descriptionElement.textContent;
        }

        let imageElement = feedQueryXPath(this.document, documentElement,
                './channel/image');
        if (imageElement !== null) {
            try {
                logo = this.parseRSS2Logo(imageElement);
            } catch (e) {
                if (!(e instanceof TypeError)) {
                    throw e;
                }
            }
        }

        let itemElements = feedQueryXPathAll(this.document, documentElement,
                './channel/item');
        for (let itemElement of itemElements) {
            let entry = this.parseRSS2Entry(itemElement);
            if (typeof entry !== 'undefined') {
                entries.push(entry);
            }
        }

        return new Feed(this.url, {title, subtitle, logo, entries});
    }

    parseFromString(xmlString, url) {
        this.url = url;
        this.document = new DOMParser().parseFromString(xmlString,
                'application/xml');
        if (this.document.documentElement.namespaceURI === XMLNS.PARSERERROR) {
            throw new ParserError(this.document.documentElement.textContent);
        }

        let [type, version] = this.constructor.probeFeed(this.document);
        if (type === 'atom') {
            if (version === '0.3') {
                return this.parseAtom03Feed();
            } else if (version === '1.0') {
                return this.parseAtomFeed();
            }
        } else if (type === 'rss') {
            if (version === '0.9' || version === '1.0') {
                return this.parseRSS1Feed(version);
            } else {
                return this.parseRSS2Feed();
            }
        }
        throw new UnsupportedFeedTypeError();
    }
}