Mercurial > addons > firefox-addons > feed-preview
view js/feed-parser.js @ 7:2bbb7617dd13
Alternatively use Atom links without the rel attribute
author | Guido Berhoerster <guido+feed-preview@berhoerster.name> |
---|---|
date | Tue, 27 Nov 2018 10:47:26 +0100 |
parents | 5d7c13e998e9 |
children | 376a0e415bba |
line wrap: on
line source
/* * Copyright (C) 2018 Guido Berhoerster <guido+feed-preview@berhoerster.name> * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 'use strict'; export const XMLNS = { ATOM10: 'http://www.w3.org/2005/Atom', RSS09: 'http://my.netscape.com/rdf/simple/0.9/' } const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']); function encodeXML(str) { return str.replace(/[<>&'"]/g, c => { switch (c) { case '<': return '<'; case '>': return '>'; case '&': return '&'; case '\'': return '''; case '"': return '"'; } }); } function parseDate(s) { let date = new Date(s); return isNaN(date) ? new Date(0) : date; } function parseURL(text, baseURL = '') { let url; try { url = new URL(text, baseURL); } catch (e) { return null; } if (!ALLOWED_LINK_PROTOCOLS.has(url.protocol)) { return null; } return url; } function feedNSResolver(prefix) { switch (prefix) { case 'atom': return XMLNS.ATOM10; case 'rss': return XMLNS.RSS09; } return null; } function feedQueryXPath(feedDocument, scopeElement, xpathQuery) { return feedDocument.evaluate(xpathQuery, scopeElement, feedNSResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; } function feedQueryXPathAll(feedDocument, scopeElement, xpathQuery) { let result = feedDocument.evaluate(xpathQuery, scopeElement, feedNSResolver, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); let nodes = []; for (let node = result.iterateNext(); node !== null; node = result.iterateNext()) { nodes.push(node); } return nodes; } export class ParserError extends Error { constructor() { super(...arguments); this.name = this.constructor.name; } } export class UnsupportedFeedTypeError extends Error { constructor(message = 'Document is not a supported feed', ...params) { super(message, ...params); this.name = this.constructor.name; } } export class ProtocolError extends Error { constructor(url, status, statusText, ...params) { let message = `Protocol error: Transfer of ${url} failed with: ` + `${status} ${statusText}` super(message, ...params); this.name = this.constructor.name; this.url = url; this.status = status; this.statusText = statusText; } } class FeedLogo { constructor(url, {title = ''} = {}) { this.url = url; this.title = title; } } class FeedEntryFile { constructor(url, {type = browser.i18n.getMessage('defaultFileType'), size = 0} = {}) { this.filename = undefined; this._url = undefined; this.url = url; this.type = type; this.size = size; } set url(url) { this._url = url; let filename = url.pathname.split('/').pop(); this.filename = filename !== '' ? filename : browser.i18n.getMessage('defaultFileName'); } get url() { return this._url; } } class FeedEntry { constructor({title = browser.i18n.getMessage('defaultFeedEntryTitle'), link = undefined, date = new Date(0), content = '', files = []} = {}) { this.title = title; this.link = link; this.date = date; this._content = undefined; this.content = content; this.files = files; } normalizeContent(text) { if (typeof text === 'undefined') { return } let contentDocument = document.implementation.createHTMLDocument(); let parsedDocument = new DOMParser().parseFromString(text, 'text/html'); contentDocument.body = contentDocument.adoptNode(parsedDocument.body); return new XMLSerializer().serializeToString(contentDocument); } set content(content) { this._content = this.normalizeContent(content); } get content() { return this._content; } } class Feed { constructor(url, {title = browser.i18n.getMessage('defaultFeedTitle'), subtitle = '', logo, entries = []} = {}) { this.url = url; this.title = title; this.subtitle = subtitle; this.logo = logo; this.entries = entries; } } export class FeedParser { static probeFeed(feedDocument) { let documentElement = feedDocument.documentElement; if (documentElement.nodeName === 'feed' && documentElement.namespaceURI === XMLNS.ATOM10) { let version = documentElement.getAttribute('version'); if (version === null) { version = '1.0'; } if (version === '1.0') { return ['atom', version]; } } else if (documentElement.nodeName === 'rss') { let version = documentElement.getAttribute('version'); switch (version) { case '0.90': case '0.91': case '0.92': case '0.93': case '0.94': case '2.0': return ['rss', version]; } } else if (documentElement.localName.toLowerCase() === 'rdf' && documentElement.getAttribute('xmlns') === XMLNS.RSS09) { return ['rss', '0.9']; } return [undefined, undefined]; } constructor() { this.url = undefined; this.document = undefined; } parseAtomLogo(logoElement) { let url = parseURL(logoElement.textContent.trim(), this.url); if (url === null) { throw new TypeError('invalid URL in <logo> element'); } return new FeedLogo(url); } parseAtomEntry(entryElement) { let title; let link; let date; let content; let titleElement = feedQueryXPath(this.document, entryElement, './atom:title'); if (titleElement !== null) { title = titleElement.textContent.trim(); } let linkElement = feedQueryXPath(this.document, entryElement, './atom:link[@href][not(@rel) or @rel="alternate"]'); if (linkElement !== null) { link = parseURL(linkElement.getAttribute('href'), this.url); } let updatedElement = feedQueryXPath(this.document, entryElement, './atom:updated'); if (updatedElement !== null) { date = parseDate(updatedElement.textContent); } let contentElement = feedQueryXPath(this.document, entryElement, './atom:content'); if (contentElement === null) { contentElement = feedQueryXPath(this.document, entryElement, './atom:summary'); } if (contentElement !== null) { let contentType = contentElement.getAttribute('type'); if (contentType === null) { contentType = 'text'; } contentType = contentType.toLowerCase(); if (contentType === 'xhtml') { content = contentElement.innerHTML; } else if (contentType === 'html') { content = contentElement.textContent; } else { let encodedContent = encodeXML(contentElement.textContent.trim()); content = `<pre>${encodedContent}</pre>`; } } return new FeedEntry({title, link, date, content}); } parseAtomFeed() { let title; let subtitle; let logo; let entries = []; let documentElement = this.document.documentElement; let titleElement = feedQueryXPath(this.document, documentElement, './atom:title'); if (titleElement !== null) { title = titleElement.textContent.trim(); } let subtitleElement = feedQueryXPath(this.document, documentElement, './atom:subtitle'); if (subtitleElement !== null) { subtitle = subtitleElement.textContent.trim(); } let logoElement = feedQueryXPath(this.document, documentElement, './atom:logo'); if (logoElement !== null) { try { logo = this.parseAtomLogo(logoElement); } catch (e) { if (!(e instanceof TypeError)) { throw e; } } } let entryElements = feedQueryXPathAll(this.document, documentElement, './atom:entry'); for (let entryElement of entryElements) { entries.push(this.parseAtomEntry(entryElement)); } return new Feed(this.url, {title, subtitle, logo, entries}); } parseRSS1Logo(imageElement) { let title; let urlElement = feedQueryXPath(this.document, imageElement, './rss:url'); if (urlElement === null) { throw new TypeError('missing <url> element in <logo> element'); } let url = parseURL(urlElement.textContent.trim(), this.url); if (url === null) { throw new TypeError('invalid URL in <logo> element'); } let titleElement = feedQueryXPath(this.document, imageElement, './rss:title'); if (titleElement !== null) { title = titleElement.textContent.trim(); } return new FeedLogo(url, {title}); } parseRSS1Entry(itemElement) { let title; let link; let titleElement = feedQueryXPath(this.document, itemElement, './rss:title'); if (titleElement !== null) { title = titleElement.textContent; } let linkElement = feedQueryXPath(this.document, itemElement, './rss:link'); if (linkElement !== null) { link = parseURL(linkElement.textContent, this.url); } return new FeedEntry({title, link}); } parseRSS1Feed() { let title; let subtitle; let logo; let entries = []; let documentElement = this.document.documentElement; let titleElement = feedQueryXPath(this.document, documentElement, './rss:channel/rss:title'); if (titleElement !== null) { title = titleElement.textContent; } let descriptionElement = feedQueryXPath(this.document, documentElement, './channel/description'); if (descriptionElement !== null) { subtitle = descriptionElement.textContent; } let imageElement = feedQueryXPath(this.document, documentElement, './rss:image'); if (imageElement !== null) { try { logo = this.parseRSS1Logo(imageElement); } catch (e) { if (!(e instanceof TypeError)) { throw e; } } } let itemElements = feedQueryXPathAll(this.document, documentElement, './rss:item'); for (let itemElement of itemElements) { let entry = this.parseRSS1Entry(itemElement); if (typeof entry !== 'undefined') { entries.push(entry); } } return new Feed(this.url, {title, subtitle, logo, entries}); } parseRSS2Logo(imageElement) { let title; let urlElement = feedQueryXPath(this.document, imageElement, './url'); if (urlElement === null) { throw new TypeError('missing <url> element in <logo> element'); } let url = parseURL(urlElement.textContent.trim(), this.url); if (url === null) { throw new TypeError('invalid URL in <logo> element'); } let titleElement = feedQueryXPath(this.document, imageElement, './title'); if (titleElement !== null) { title = titleElement.textContent.trim(); } return new FeedLogo(url, {title}); } parseRSS2EntryFile(enclosureElement) { let type; let size; let url = parseURL(enclosureElement.getAttribute('url'), this.url); if (url === null) { throw new TypeError('invalid URL in <enclosure> element'); } let typeAttribute = enclosureElement.getAttribute('type'); if (typeAttribute !== null) { type = typeAttribute; } let length = parseInt(enclosureElement.getAttribute('length'), 10); if (!isNaN(length)) { size = length; } return new FeedEntryFile(url, {type, size}); } parseRSS2Entry(itemElement) { let title; let link; let date; let content; let files = []; let titleElement = feedQueryXPath(this.document, itemElement, './title'); if (titleElement !== null) { title = titleElement.textContent; } let linkElement = feedQueryXPath(this.document, itemElement, './link'); if (linkElement !== null) { link = parseURL(linkElement.textContent, this.url); } let pubDateElement = feedQueryXPath(this.document, itemElement, './pubDate'); if (pubDateElement !== null) { date = parseDate(pubDateElement.textContent); } let descriptionElement = feedQueryXPath(this.document, itemElement, './description'); if (descriptionElement !== null) { content = descriptionElement.textContent.trim(); } for (let enclosureElement of feedQueryXPathAll(this.document, itemElement, './enclosure')) { try { let entryFile = this.parseRSS2EntryFile(enclosureElement); files.push(entryFile); } catch (e) { if (!(e instanceof TypeError)) { throw e; } } } return new FeedEntry({title, link, date, content, files}); } parseRSS2Feed() { let title; let subtitle; let logo; let entries = []; let documentElement = this.document.documentElement; let titleElement = feedQueryXPath(this.document, documentElement, './channel/title'); if (titleElement !== null) { title = titleElement.textContent; } let descriptionElement = feedQueryXPath(this.document, documentElement, './channel/description'); if (descriptionElement !== null) { subtitle = descriptionElement.textContent; } let imageElement = feedQueryXPath(this.document, documentElement, './channel/image'); if (imageElement !== null) { try { logo = this.parseRSS2Logo(imageElement); } catch (e) { if (!(e instanceof TypeError)) { throw e; } } } let itemElements = feedQueryXPathAll(this.document, documentElement, './channel/item'); for (let itemElement of itemElements) { let entry = this.parseRSS2Entry(itemElement); if (typeof entry !== 'undefined') { entries.push(entry); } } return new Feed(this.url, {title, subtitle, logo, entries}); } parseFromString(xmlString, url) { this.url = url; this.document = new DOMParser().parseFromString(xmlString, 'application/xml'); if (this.document.documentElement.nodeName.toLowerCase() === 'parsererror') { throw new ParserError(this.document.documentElement.textContent); } let [type, version] = this.constructor.probeFeed(this.document); if (type === 'atom') { return this.parseAtomFeed(); } else if (type === 'rss') { if (version === '0.9') { return this.parseRSS1Feed(); } else { return this.parseRSS2Feed(); } } throw new UnsupportedFeedTypeError(); } }