view js/background.js @ 14:376a0e415bba

Properly handle non-text content in Atom feed elements The title, subtitle, summary and content elements of Atom feeds can all have non-text content. When parsing title and subtitle elements HTML and XHTML content will be stripped of any markup in order to keep it simple. In summary and content elements markup will be preserved. Element content of any other type as well as remote content in content elements will be ignored.
author Guido Berhoerster <guido+feed-preview@berhoerster.name>
date Mon, 10 Dec 2018 16:38:11 +0100
parents a4590add4901
children a59d322e5826
line wrap: on
line source

/*
 * Copyright (C) 2018 Guido Berhoerster <guido+feed-preview@berhoerster.name>
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

'use strict';

import * as feedParser from './feed-parser.js';
import {renderFeedPreview} from './feed-preview.js';

const FEED_READERS_PRESET = [
    {
        title: 'Feedly',
        urlTemplate: 'https://feedly.com/#subscription/feed/%s'
    },
    {
        title: 'FlowReader',
        urlTemplate: 'https://www.flowreader.com/subscribe?url=%s'
    },
    {
        title: 'InoReader',
        urlTemplate: 'https://www.inoreader.com/feed/%s'
    },
    {
        title: 'Kouio',
        urlTemplate: 'https://kouio.com/subscribe?url=%s'
    },
    {
        title: 'My Yahoo',
        urlTemplate: 'https://add.my.yahoo.com/rss?url=%s'
    },
    {
        title: 'Netvibes',
        urlTemplate: 'https://www.netvibes.com/subscribe.php?url=%s'
    },
    {
        title: 'NewsBlur',
        urlTemplate: 'https://www.newsblur.com/?url=%s'
    },
    {
        title: 'The Old Reader',
        urlTemplate: 'https://theoldreader.com/feeds/subscribe?url=%s'
    }
];
const FEED_MAGIC = [
    '<rss',
    '<feed',
    feedParser.XMLNS.ATOM03,
    feedParser.XMLNS.ATOM10,
    feedParser.XMLNS.RSS09,
    feedParser.XMLNS.RSS10
];
var tabsFeeds = new Map();
var tabsFeedPreviews = new Map();
var fetchingFeedPreview = fetch('web_resources/feed-preview.xhtml')
        .then(response => response.text());

function parseContentType(header) {
    let contentType = {
        mediaType: '',
        charset: 'utf-8'
    };
    let parts = header.toLowerCase().split(';');
    contentType.mediaType = parts.shift().trim();
    for (let parameter of parts) {
        let [, name, value, ] = parameter.trim().split(/([^=]+)="?([^"]*)"?/);
        if (name.toLowerCase() === 'charset') {
            contentType.charset = value.toLowerCase();
            break;
        }
    }

    return contentType;
}

async function handleFeed(inputText, tabId, url) {
    // fast-path: eliminate XML documents which cannot be Atom nor RSS feeds
    let inputTextStart = inputText.substring(0, 512);
    if (!FEED_MAGIC.some(element => inputTextStart.includes(element))) {
        return inputText;
    }

    let feed;
    try {
        feed = (new feedParser.FeedParser).parseFromString(inputText, url);
    } catch (e) {
        if (e instanceof feedParser.ParserError ||
                e instanceof feedParser.UnsupportedFeedTypeError) {
            // let the browser deal with non-well formed XML or XML documents
            // which are not supported Atom or RSS feeds
            return inputText;
        }
        throw e;
    }
    console.log(`parsed feed ${url}:\n`, feed);

    // mark this feed preview for content script injection
    tabsFeedPreviews.set(tabId, url);

    // render the preview document
    let feedPreviewDocument = new DOMParser()
            .parseFromString(await fetchingFeedPreview, 'text/html');
    renderFeedPreview(feedPreviewDocument, feed);

    return new XMLSerializer().serializeToString(feedPreviewDocument);
}

browser.webRequest.onHeadersReceived.addListener(details => {
    if (details.statusCode !== 200) {
        return {};
    }

    let contentTypeIndex = details.responseHeaders.findIndex(header =>
            header.name.toLowerCase() === 'content-type' &&
            typeof header.value !== 'undefined');
    if (contentTypeIndex < 0) {
        // no Content-Type header found
        return {};
    }
    let headerValue = details.responseHeaders[contentTypeIndex].value
    let contentType = parseContentType(headerValue);
    // until content handlers become available to webextensions
    // (https://bugzilla.mozilla.org/show_bug.cgi?id=1457500) intercept all
    // responses and change the content type from application/atom+xml or
    // application/rss+xml to application/xml which will then be probed for
    // Atom or RSS content
    switch (contentType.mediaType) {
        case 'application/atom+xml':
        case 'application/rss+xml':
        case 'application/rdf+xml':
        case 'application/xml':
            break;
        default:
            // non-XML media type
            return {};
    }
    console.log(`response is an XML document\n`,
            `media type: ${contentType.mediaType}\n`,
            `charset: ${contentType.charset}`);

    let decoder;
    try {
        decoder = new TextDecoder(contentType.charset);
    } catch (e) {
        if (e instanceof RangeError) {
            // unsupported charset
            return {};
        } else {
            throw e;
        }
    }
    let encoder = new TextEncoder();
    let inputText = '';
    let filter = browser.webRequest.filterResponseData(details.requestId);
    filter.addEventListener('data', ev => {
        inputText += decoder.decode(ev.data, {stream: true});
    });
    filter.addEventListener('stop', async ev => {
        let result = await handleFeed(inputText, details.tabId, details.url);
        filter.write(encoder.encode(result));
        filter.close();
    });

    details.responseHeaders[contentTypeIndex] = {
        name: 'Content-Type',
        value: `application/xml;charset=${contentType.charset}`
    };

    return {responseHeaders: details.responseHeaders};
},
        {urls: ['http://*/*', 'https://*/*'], types: ['main_frame']},
        ['blocking', 'responseHeaders']);

browser.runtime.onMessage.addListener((request, sender, sendResponse) => {
    let tab = sender.tab;
    if (typeof tab !== 'undefined') {
        // content script sending feeds
        tabsFeeds.set(tab.id, request);
        browser.pageAction.show(tab.id);
    } else {
        // popup querying feeds
        sendResponse(tabsFeeds.get(request));
    }
});

browser.tabs.onUpdated.addListener((tabId, changeInfo, tab) => {
    if (typeof changeInfo.url === 'undefined') {
        // filter out updates which do not change the URL
        return;
    }

    // hide the page action when the URL changes since it is no longer valid,
    // it will be shown again if the content script detects a feed
    browser.pageAction.hide(tabId);

    // inject content script once if the requested URL is a feed preview
    if (tabsFeedPreviews.get(tabId) === changeInfo.url) {
        browser.tabs.executeScript(tabId, {
            file: 'content_scripts/feed-readers.js'
        });
        tabsFeedPreviews.delete(tabId);
    }
});

browser.tabs.onRemoved.addListener((tabId, removeInfo) => {
    tabsFeeds.delete(tabId);
    tabsFeedPreviews.delete(tabId);
});

browser.runtime.onInstalled.addListener(async details => {
    if (details.reason === 'install' ||
            (details.reason === 'update' && details.previousVersion < 2)) {
        let {feedReaders = []} = await browser.storage.sync.get('feedReaders');
        let feedReadersSet =
                new Set(feedReaders.map(feedReader => feedReader.urlTemplate));
        for (let feedReader of FEED_READERS_PRESET) {
            if (!feedReadersSet.has(feedReader.urlTemplate)) {
                feedReaders.push(feedReader);
            }
        }
        console.log('set feedReaders to', feedReaders);
        browser.storage.sync.set({feedReaders});
    }
});