diff js/feed-parser.js @ 15:150f07c7595f

Add support for Atom 0.3 feeds
author Guido Berhoerster <guido+feed-preview@berhoerster.name>
date Mon, 10 Dec 2018 23:24:36 +0100
parents 376a0e415bba
children 48cabd01ef64
line wrap: on
line diff
--- a/js/feed-parser.js	Mon Dec 10 16:38:11 2018 +0100
+++ b/js/feed-parser.js	Mon Dec 10 23:24:36 2018 +0100
@@ -9,9 +9,11 @@
 'use strict';
 
 export const XMLNS = {
+    ATOM03: 'http://purl.org/atom/ns#',
     ATOM10: 'http://www.w3.org/2005/Atom',
     RSS09: 'http://my.netscape.com/rdf/simple/0.9/',
-    XHTML: 'http://www.w3.org/1999/xhtml'
+    XHTML: 'http://www.w3.org/1999/xhtml',
+    PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml'
 }
 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']);
 
@@ -48,8 +50,24 @@
     return url;
 }
 
+function base64Decode(base64Str) {
+    let encodedText;
+    try {
+        encodedText = atob(base64Str);
+    } catch (e) {
+        throw (e instanceof DOMException) ? new TypeError(e.message) : e;
+    }
+    let byteBuffer = new Uint8Array(new ArrayBuffer(encodedText.length));
+    for (let i = 0; i < encodedText.length; i++) {
+        byteBuffer[i] = encodedText.charCodeAt(i);
+    }
+    return new TextDecoder().decode(byteBuffer);
+}
+
 function feedNSResolver(prefix) {
     switch (prefix) {
+        case 'atom03':
+            return XMLNS.ATOM03;
         case 'atom':
             return XMLNS.ATOM10;
         case 'rss':
@@ -177,14 +195,11 @@
     static probeFeed(feedDocument) {
         let documentElement = feedDocument.documentElement;
         if (documentElement.nodeName === 'feed' &&
+                documentElement.namespaceURI === XMLNS.ATOM03) {
+            return ['atom', '0.3'];
+        } else if (documentElement.nodeName === 'feed' &&
                 documentElement.namespaceURI === XMLNS.ATOM10) {
-            let version = documentElement.getAttribute('version');
-            if (version === null) {
-                version = '1.0';
-            }
-            if (version === '1.0') {
-                return ['atom', version];
-            }
+            return ['atom', '1.0'];
         } else if (documentElement.nodeName === 'rss') {
             let version = documentElement.getAttribute('version');
             switch (version) {
@@ -209,6 +224,209 @@
         this.document = undefined;
     }
 
+    parseAtom03ContentConstruct(containerElement, textOnly = true) {
+        let contentType = containerElement.getAttribute('type');
+        let contentMode = containerElement.getAttribute('mode');
+        if (contentType === null) {
+            contentType = 'text/plain';
+        }
+        if (contentMode === null) {
+            contentMode = 'xml';
+        }
+        if (contentType === 'application/xhtml+xml') {
+            let htmlText;
+            if (contentMode === 'xml') {
+                return textOnly ? containerElement.textContent.trim() :
+                        containerElement.innerHTML;
+            } else if (contentMode === 'escaped') {
+                htmlText = containerElement.textContent;
+            } else if (contentMode === 'base64') {
+                htmlText = base64Decode(containerElement.textContent);
+            }
+            if (typeof htmlText === 'undefined') {
+                return;
+            }
+            if (textOnly) {
+                let htmlDocument = new DOMParser().parseFromString(htmlText,
+                        'application/xhtml+xml');
+                if (htmlDocument.documentElement.namespaceURI ===
+                        XMLNS.PARSERERROR) {
+                    return;
+                }
+                return htmlDocument.body.textContent.trim();
+            }
+            return htmlText;
+        } else if (contentType === 'text/html') {
+            let htmlText;
+            if (contentMode === 'escaped') {
+                htmlText = containerElement.textContent;
+            } else if (contentMode === 'base64') {
+                htmlText = base64Decode(containerElement.textContent);
+            }
+            if (typeof htmlText === 'undefined') {
+                return;
+            }
+            if (textOnly) {
+                let htmlDocument = new DOMParser().parseFromString(htmlText,
+                        'text/html');
+                return htmlDocument.body.textContent.trim();
+            }
+            return htmlText;
+        } else if (contentType === 'text/plain') {
+            let text;
+            if (contentMode === 'escaped') {
+                text = containerElement.textContent;
+            } else if (contentMode === 'base64') {
+                text = base64Decode(containerElement.textContent);
+            }
+            if (typeof text === 'undefined') {
+                return;
+            }
+            return textOnly ? text : `<pre>${encodeXML(text)}</pre>`;
+        }
+        return;
+    }
+
+    parseAtom03Content(contentElement) {
+        // ordered from lowest to highest preference
+        const contentTypes = [
+            'text/plain',
+            'text/html',
+            'application/xhtml+xml'
+        ];
+        if (contentElement.getAttribute('type') === 'multipart/alternative' &&
+                contentElement.getAttribute('mode') === null) {
+            // select alternative according to above preference
+            let selectedTypeIndex = -1;
+            let selectedElement;
+            for (let innerContentElement of contentElement.children) {
+                if (innerContentElement.localName !== 'content' ||
+                        innerContentElement.namespaceURI !== XMLNS.ATOM03) {
+                    throw new TypeError('child elements of a multipart ' +
+                            ' content elements must be content elements');
+                }
+                let innerContentType = innerContentElement.getAttribute('type');
+                if (innerContentType === null) {
+                    innerContentType = 'text/plain';
+                }
+                let typeIndex = contentTypes.indexOf(innerContentType);
+                if (typeIndex > selectedTypeIndex) {
+                    selectedTypeIndex = typeIndex;
+                    selectedElement = innerContentElement;
+                }
+            }
+            if (selectedTypeIndex >= 0) {
+                contentElement = selectedElement;
+            }
+        }
+
+        return this.parseAtom03ContentConstruct(contentElement, false);
+    }
+
+    parseAtom03Entry(entryElement) {
+        let title;
+        let link;
+        let date;
+        let content;
+        let titleElement = feedQueryXPath(this.document, entryElement,
+                './atom03:title');
+        if (titleElement !== null) {
+            title = titleElement.textContent.trim();
+        }
+
+        let linkElement = feedQueryXPath(this.document, entryElement,
+                './atom03:link[@href][@rel="alternate"]');
+        if (linkElement !== null) {
+            link = parseURL(linkElement.getAttribute('href'), this.url);
+        }
+
+        let modifiedElement = feedQueryXPath(this.document, entryElement,
+                './atom03:modified');
+        if (modifiedElement !== null) {
+            date = parseDate(modifiedElement.textContent);
+        }
+
+        let contentElement = feedQueryXPath(this.document, entryElement,
+                './atom03:content');
+        if (contentElement !== null) {
+            try {
+                content = this.parseAtom03Content(contentElement);
+            } catch (e) {
+                if (!(e instanceof TypeError)) {
+                    throw e;
+                }
+            }
+        }
+        if (typeof content === 'undefined') {
+            let summaryElement = feedQueryXPath(this.document, entryElement,
+                    './atom03:summary');
+            if (summaryElement !== null) {
+                try {
+                    content = this.parseAtom03ContentConstruct(summaryElement,
+                            false);
+                } catch (e) {
+                    if (!(e instanceof TypeError)) {
+                        throw e;
+                    }
+                }
+            }
+        }
+
+        return new FeedEntry({title, link, date, content});
+    }
+
+    parseAtom03Feed() {
+        let title;
+        let subtitle;
+        let logo;
+        let entries = [];
+        let documentElement = this.document.documentElement;
+
+        let titleElement = feedQueryXPath(this.document, documentElement,
+                './atom03:title');
+        if (titleElement !== null) {
+            try {
+                title = this.parseAtom03ContentConstruct(titleElement);
+            } catch (e) {
+                if (!(e instanceof TypeError)) {
+                    throw e;
+                }
+            }
+        }
+
+        let taglineElement = feedQueryXPath(this.document, documentElement,
+                './atom03:tagline');
+        if (taglineElement !== null) {
+            try {
+                title = this.parseAtom03ContentConstruct(taglineElement);
+            } catch (e) {
+                if (!(e instanceof TypeError)) {
+                    throw e;
+                }
+            }
+        }
+
+        let logoElement = feedQueryXPath(this.document, documentElement,
+                './atom03:logo');
+        if (logoElement !== null) {
+            try {
+                logo = this.parseAtomLogo(logoElement);
+            } catch (e) {
+                if (!(e instanceof TypeError)) {
+                    throw e;
+                }
+            }
+        }
+
+        let entryElements = feedQueryXPathAll(this.document, documentElement,
+                './atom03:entry');
+        for (let entryElement of entryElements) {
+            entries.push(this.parseAtom03Entry(entryElement));
+        }
+
+        return new Feed(this.url, {title, subtitle, logo, entries});
+    }
+
     parseAtomLogo(logoElement) {
         let url = parseURL(logoElement.textContent.trim(), this.url);
         if (url === null) {
@@ -549,14 +767,17 @@
         this.url = url;
         this.document = new DOMParser().parseFromString(xmlString,
                 'application/xml');
-        if (this.document.documentElement.nodeName.toLowerCase() ===
-                'parsererror') {
+        if (this.document.documentElement.namespaceURI === XMLNS.PARSERERROR) {
             throw new ParserError(this.document.documentElement.textContent);
         }
 
         let [type, version] = this.constructor.probeFeed(this.document);
         if (type === 'atom') {
-            return this.parseAtomFeed();
+            if (version === '0.3') {
+                return this.parseAtom03Feed();
+            } else if (version === '1.0') {
+                return this.parseAtomFeed();
+            }
         } else if (type === 'rss') {
             if (version === '0.9') {
                 return this.parseRSS1Feed();