# HG changeset patch # User Guido Berhoerster # Date 1544480676 -3600 # Node ID 150f07c7595f4ba614c841681091fe9d8625da2d # Parent 376a0e415bbac79f4d2d0ceb08080c476e718be0 Add support for Atom 0.3 feeds diff -r 376a0e415bba -r 150f07c7595f js/feed-parser.js --- a/js/feed-parser.js Mon Dec 10 16:38:11 2018 +0100 +++ b/js/feed-parser.js Mon Dec 10 23:24:36 2018 +0100 @@ -9,9 +9,11 @@ 'use strict'; export const XMLNS = { + ATOM03: 'http://purl.org/atom/ns#', ATOM10: 'http://www.w3.org/2005/Atom', RSS09: 'http://my.netscape.com/rdf/simple/0.9/', - XHTML: 'http://www.w3.org/1999/xhtml' + XHTML: 'http://www.w3.org/1999/xhtml', + PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml' } const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']); @@ -48,8 +50,24 @@ return url; } +function base64Decode(base64Str) { + let encodedText; + try { + encodedText = atob(base64Str); + } catch (e) { + throw (e instanceof DOMException) ? new TypeError(e.message) : e; + } + let byteBuffer = new Uint8Array(new ArrayBuffer(encodedText.length)); + for (let i = 0; i < encodedText.length; i++) { + byteBuffer[i] = encodedText.charCodeAt(i); + } + return new TextDecoder().decode(byteBuffer); +} + function feedNSResolver(prefix) { switch (prefix) { + case 'atom03': + return XMLNS.ATOM03; case 'atom': return XMLNS.ATOM10; case 'rss': @@ -177,14 +195,11 @@ static probeFeed(feedDocument) { let documentElement = feedDocument.documentElement; if (documentElement.nodeName === 'feed' && + documentElement.namespaceURI === XMLNS.ATOM03) { + return ['atom', '0.3']; + } else if (documentElement.nodeName === 'feed' && documentElement.namespaceURI === XMLNS.ATOM10) { - let version = documentElement.getAttribute('version'); - if (version === null) { - version = '1.0'; - } - if (version === '1.0') { - return ['atom', version]; - } + return ['atom', '1.0']; } else if (documentElement.nodeName === 'rss') { let version = documentElement.getAttribute('version'); switch (version) { @@ -209,6 +224,209 @@ this.document = undefined; } + parseAtom03ContentConstruct(containerElement, textOnly = true) { + let contentType = containerElement.getAttribute('type'); + let contentMode = containerElement.getAttribute('mode'); + if (contentType === null) { + contentType = 'text/plain'; + } + if (contentMode === null) { + contentMode = 'xml'; + } + if (contentType === 'application/xhtml+xml') { + let htmlText; + if (contentMode === 'xml') { + return textOnly ? containerElement.textContent.trim() : + containerElement.innerHTML; + } else if (contentMode === 'escaped') { + htmlText = containerElement.textContent; + } else if (contentMode === 'base64') { + htmlText = base64Decode(containerElement.textContent); + } + if (typeof htmlText === 'undefined') { + return; + } + if (textOnly) { + let htmlDocument = new DOMParser().parseFromString(htmlText, + 'application/xhtml+xml'); + if (htmlDocument.documentElement.namespaceURI === + XMLNS.PARSERERROR) { + return; + } + return htmlDocument.body.textContent.trim(); + } + return htmlText; + } else if (contentType === 'text/html') { + let htmlText; + if (contentMode === 'escaped') { + htmlText = containerElement.textContent; + } else if (contentMode === 'base64') { + htmlText = base64Decode(containerElement.textContent); + } + if (typeof htmlText === 'undefined') { + return; + } + if (textOnly) { + let htmlDocument = new DOMParser().parseFromString(htmlText, + 'text/html'); + return htmlDocument.body.textContent.trim(); + } + return htmlText; + } else if (contentType === 'text/plain') { + let text; + if (contentMode === 'escaped') { + text = containerElement.textContent; + } else if (contentMode === 'base64') { + text = base64Decode(containerElement.textContent); + } + if (typeof text === 'undefined') { + return; + } + return textOnly ? text : `
${encodeXML(text)}
`; + } + return; + } + + parseAtom03Content(contentElement) { + // ordered from lowest to highest preference + const contentTypes = [ + 'text/plain', + 'text/html', + 'application/xhtml+xml' + ]; + if (contentElement.getAttribute('type') === 'multipart/alternative' && + contentElement.getAttribute('mode') === null) { + // select alternative according to above preference + let selectedTypeIndex = -1; + let selectedElement; + for (let innerContentElement of contentElement.children) { + if (innerContentElement.localName !== 'content' || + innerContentElement.namespaceURI !== XMLNS.ATOM03) { + throw new TypeError('child elements of a multipart ' + + ' content elements must be content elements'); + } + let innerContentType = innerContentElement.getAttribute('type'); + if (innerContentType === null) { + innerContentType = 'text/plain'; + } + let typeIndex = contentTypes.indexOf(innerContentType); + if (typeIndex > selectedTypeIndex) { + selectedTypeIndex = typeIndex; + selectedElement = innerContentElement; + } + } + if (selectedTypeIndex >= 0) { + contentElement = selectedElement; + } + } + + return this.parseAtom03ContentConstruct(contentElement, false); + } + + parseAtom03Entry(entryElement) { + let title; + let link; + let date; + let content; + let titleElement = feedQueryXPath(this.document, entryElement, + './atom03:title'); + if (titleElement !== null) { + title = titleElement.textContent.trim(); + } + + let linkElement = feedQueryXPath(this.document, entryElement, + './atom03:link[@href][@rel="alternate"]'); + if (linkElement !== null) { + link = parseURL(linkElement.getAttribute('href'), this.url); + } + + let modifiedElement = feedQueryXPath(this.document, entryElement, + './atom03:modified'); + if (modifiedElement !== null) { + date = parseDate(modifiedElement.textContent); + } + + let contentElement = feedQueryXPath(this.document, entryElement, + './atom03:content'); + if (contentElement !== null) { + try { + content = this.parseAtom03Content(contentElement); + } catch (e) { + if (!(e instanceof TypeError)) { + throw e; + } + } + } + if (typeof content === 'undefined') { + let summaryElement = feedQueryXPath(this.document, entryElement, + './atom03:summary'); + if (summaryElement !== null) { + try { + content = this.parseAtom03ContentConstruct(summaryElement, + false); + } catch (e) { + if (!(e instanceof TypeError)) { + throw e; + } + } + } + } + + return new FeedEntry({title, link, date, content}); + } + + parseAtom03Feed() { + let title; + let subtitle; + let logo; + let entries = []; + let documentElement = this.document.documentElement; + + let titleElement = feedQueryXPath(this.document, documentElement, + './atom03:title'); + if (titleElement !== null) { + try { + title = this.parseAtom03ContentConstruct(titleElement); + } catch (e) { + if (!(e instanceof TypeError)) { + throw e; + } + } + } + + let taglineElement = feedQueryXPath(this.document, documentElement, + './atom03:tagline'); + if (taglineElement !== null) { + try { + title = this.parseAtom03ContentConstruct(taglineElement); + } catch (e) { + if (!(e instanceof TypeError)) { + throw e; + } + } + } + + let logoElement = feedQueryXPath(this.document, documentElement, + './atom03:logo'); + if (logoElement !== null) { + try { + logo = this.parseAtomLogo(logoElement); + } catch (e) { + if (!(e instanceof TypeError)) { + throw e; + } + } + } + + let entryElements = feedQueryXPathAll(this.document, documentElement, + './atom03:entry'); + for (let entryElement of entryElements) { + entries.push(this.parseAtom03Entry(entryElement)); + } + + return new Feed(this.url, {title, subtitle, logo, entries}); + } + parseAtomLogo(logoElement) { let url = parseURL(logoElement.textContent.trim(), this.url); if (url === null) { @@ -549,14 +767,17 @@ this.url = url; this.document = new DOMParser().parseFromString(xmlString, 'application/xml'); - if (this.document.documentElement.nodeName.toLowerCase() === - 'parsererror') { + if (this.document.documentElement.namespaceURI === XMLNS.PARSERERROR) { throw new ParserError(this.document.documentElement.textContent); } let [type, version] = this.constructor.probeFeed(this.document); if (type === 'atom') { - return this.parseAtomFeed(); + if (version === '0.3') { + return this.parseAtom03Feed(); + } else if (version === '1.0') { + return this.parseAtomFeed(); + } } else if (type === 'rss') { if (version === '0.9') { return this.parseRSS1Feed();