addons/firefox-addons/feed-preview

changeset 15:150f07c7595f

Add support for Atom 0.3 feeds
author Guido Berhoerster <guido+feed-preview@berhoerster.name>
date Mon Dec 10 23:24:36 2018 +0100 (18 months ago)
parents 376a0e415bba
children a59d322e5826
files js/feed-parser.js
line diff
     1.1 --- a/js/feed-parser.js	Mon Dec 10 16:38:11 2018 +0100
     1.2 +++ b/js/feed-parser.js	Mon Dec 10 23:24:36 2018 +0100
     1.3 @@ -9,9 +9,11 @@
     1.4  'use strict';
     1.5  
     1.6  export const XMLNS = {
     1.7 +    ATOM03: 'http://purl.org/atom/ns#',
     1.8      ATOM10: 'http://www.w3.org/2005/Atom',
     1.9      RSS09: 'http://my.netscape.com/rdf/simple/0.9/',
    1.10 -    XHTML: 'http://www.w3.org/1999/xhtml'
    1.11 +    XHTML: 'http://www.w3.org/1999/xhtml',
    1.12 +    PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml'
    1.13  }
    1.14  const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']);
    1.15  
    1.16 @@ -48,8 +50,24 @@
    1.17      return url;
    1.18  }
    1.19  
    1.20 +function base64Decode(base64Str) {
    1.21 +    let encodedText;
    1.22 +    try {
    1.23 +        encodedText = atob(base64Str);
    1.24 +    } catch (e) {
    1.25 +        throw (e instanceof DOMException) ? new TypeError(e.message) : e;
    1.26 +    }
    1.27 +    let byteBuffer = new Uint8Array(new ArrayBuffer(encodedText.length));
    1.28 +    for (let i = 0; i < encodedText.length; i++) {
    1.29 +        byteBuffer[i] = encodedText.charCodeAt(i);
    1.30 +    }
    1.31 +    return new TextDecoder().decode(byteBuffer);
    1.32 +}
    1.33 +
    1.34  function feedNSResolver(prefix) {
    1.35      switch (prefix) {
    1.36 +        case 'atom03':
    1.37 +            return XMLNS.ATOM03;
    1.38          case 'atom':
    1.39              return XMLNS.ATOM10;
    1.40          case 'rss':
    1.41 @@ -177,14 +195,11 @@
    1.42      static probeFeed(feedDocument) {
    1.43          let documentElement = feedDocument.documentElement;
    1.44          if (documentElement.nodeName === 'feed' &&
    1.45 +                documentElement.namespaceURI === XMLNS.ATOM03) {
    1.46 +            return ['atom', '0.3'];
    1.47 +        } else if (documentElement.nodeName === 'feed' &&
    1.48                  documentElement.namespaceURI === XMLNS.ATOM10) {
    1.49 -            let version = documentElement.getAttribute('version');
    1.50 -            if (version === null) {
    1.51 -                version = '1.0';
    1.52 -            }
    1.53 -            if (version === '1.0') {
    1.54 -                return ['atom', version];
    1.55 -            }
    1.56 +            return ['atom', '1.0'];
    1.57          } else if (documentElement.nodeName === 'rss') {
    1.58              let version = documentElement.getAttribute('version');
    1.59              switch (version) {
    1.60 @@ -209,6 +224,209 @@
    1.61          this.document = undefined;
    1.62      }
    1.63  
    1.64 +    parseAtom03ContentConstruct(containerElement, textOnly = true) {
    1.65 +        let contentType = containerElement.getAttribute('type');
    1.66 +        let contentMode = containerElement.getAttribute('mode');
    1.67 +        if (contentType === null) {
    1.68 +            contentType = 'text/plain';
    1.69 +        }
    1.70 +        if (contentMode === null) {
    1.71 +            contentMode = 'xml';
    1.72 +        }
    1.73 +        if (contentType === 'application/xhtml+xml') {
    1.74 +            let htmlText;
    1.75 +            if (contentMode === 'xml') {
    1.76 +                return textOnly ? containerElement.textContent.trim() :
    1.77 +                        containerElement.innerHTML;
    1.78 +            } else if (contentMode === 'escaped') {
    1.79 +                htmlText = containerElement.textContent;
    1.80 +            } else if (contentMode === 'base64') {
    1.81 +                htmlText = base64Decode(containerElement.textContent);
    1.82 +            }
    1.83 +            if (typeof htmlText === 'undefined') {
    1.84 +                return;
    1.85 +            }
    1.86 +            if (textOnly) {
    1.87 +                let htmlDocument = new DOMParser().parseFromString(htmlText,
    1.88 +                        'application/xhtml+xml');
    1.89 +                if (htmlDocument.documentElement.namespaceURI ===
    1.90 +                        XMLNS.PARSERERROR) {
    1.91 +                    return;
    1.92 +                }
    1.93 +                return htmlDocument.body.textContent.trim();
    1.94 +            }
    1.95 +            return htmlText;
    1.96 +        } else if (contentType === 'text/html') {
    1.97 +            let htmlText;
    1.98 +            if (contentMode === 'escaped') {
    1.99 +                htmlText = containerElement.textContent;
   1.100 +            } else if (contentMode === 'base64') {
   1.101 +                htmlText = base64Decode(containerElement.textContent);
   1.102 +            }
   1.103 +            if (typeof htmlText === 'undefined') {
   1.104 +                return;
   1.105 +            }
   1.106 +            if (textOnly) {
   1.107 +                let htmlDocument = new DOMParser().parseFromString(htmlText,
   1.108 +                        'text/html');
   1.109 +                return htmlDocument.body.textContent.trim();
   1.110 +            }
   1.111 +            return htmlText;
   1.112 +        } else if (contentType === 'text/plain') {
   1.113 +            let text;
   1.114 +            if (contentMode === 'escaped') {
   1.115 +                text = containerElement.textContent;
   1.116 +            } else if (contentMode === 'base64') {
   1.117 +                text = base64Decode(containerElement.textContent);
   1.118 +            }
   1.119 +            if (typeof text === 'undefined') {
   1.120 +                return;
   1.121 +            }
   1.122 +            return textOnly ? text : `<pre>${encodeXML(text)}</pre>`;
   1.123 +        }
   1.124 +        return;
   1.125 +    }
   1.126 +
   1.127 +    parseAtom03Content(contentElement) {
   1.128 +        // ordered from lowest to highest preference
   1.129 +        const contentTypes = [
   1.130 +            'text/plain',
   1.131 +            'text/html',
   1.132 +            'application/xhtml+xml'
   1.133 +        ];
   1.134 +        if (contentElement.getAttribute('type') === 'multipart/alternative' &&
   1.135 +                contentElement.getAttribute('mode') === null) {
   1.136 +            // select alternative according to above preference
   1.137 +            let selectedTypeIndex = -1;
   1.138 +            let selectedElement;
   1.139 +            for (let innerContentElement of contentElement.children) {
   1.140 +                if (innerContentElement.localName !== 'content' ||
   1.141 +                        innerContentElement.namespaceURI !== XMLNS.ATOM03) {
   1.142 +                    throw new TypeError('child elements of a multipart ' +
   1.143 +                            ' content elements must be content elements');
   1.144 +                }
   1.145 +                let innerContentType = innerContentElement.getAttribute('type');
   1.146 +                if (innerContentType === null) {
   1.147 +                    innerContentType = 'text/plain';
   1.148 +                }
   1.149 +                let typeIndex = contentTypes.indexOf(innerContentType);
   1.150 +                if (typeIndex > selectedTypeIndex) {
   1.151 +                    selectedTypeIndex = typeIndex;
   1.152 +                    selectedElement = innerContentElement;
   1.153 +                }
   1.154 +            }
   1.155 +            if (selectedTypeIndex >= 0) {
   1.156 +                contentElement = selectedElement;
   1.157 +            }
   1.158 +        }
   1.159 +
   1.160 +        return this.parseAtom03ContentConstruct(contentElement, false);
   1.161 +    }
   1.162 +
   1.163 +    parseAtom03Entry(entryElement) {
   1.164 +        let title;
   1.165 +        let link;
   1.166 +        let date;
   1.167 +        let content;
   1.168 +        let titleElement = feedQueryXPath(this.document, entryElement,
   1.169 +                './atom03:title');
   1.170 +        if (titleElement !== null) {
   1.171 +            title = titleElement.textContent.trim();
   1.172 +        }
   1.173 +
   1.174 +        let linkElement = feedQueryXPath(this.document, entryElement,
   1.175 +                './atom03:link[@href][@rel="alternate"]');
   1.176 +        if (linkElement !== null) {
   1.177 +            link = parseURL(linkElement.getAttribute('href'), this.url);
   1.178 +        }
   1.179 +
   1.180 +        let modifiedElement = feedQueryXPath(this.document, entryElement,
   1.181 +                './atom03:modified');
   1.182 +        if (modifiedElement !== null) {
   1.183 +            date = parseDate(modifiedElement.textContent);
   1.184 +        }
   1.185 +
   1.186 +        let contentElement = feedQueryXPath(this.document, entryElement,
   1.187 +                './atom03:content');
   1.188 +        if (contentElement !== null) {
   1.189 +            try {
   1.190 +                content = this.parseAtom03Content(contentElement);
   1.191 +            } catch (e) {
   1.192 +                if (!(e instanceof TypeError)) {
   1.193 +                    throw e;
   1.194 +                }
   1.195 +            }
   1.196 +        }
   1.197 +        if (typeof content === 'undefined') {
   1.198 +            let summaryElement = feedQueryXPath(this.document, entryElement,
   1.199 +                    './atom03:summary');
   1.200 +            if (summaryElement !== null) {
   1.201 +                try {
   1.202 +                    content = this.parseAtom03ContentConstruct(summaryElement,
   1.203 +                            false);
   1.204 +                } catch (e) {
   1.205 +                    if (!(e instanceof TypeError)) {
   1.206 +                        throw e;
   1.207 +                    }
   1.208 +                }
   1.209 +            }
   1.210 +        }
   1.211 +
   1.212 +        return new FeedEntry({title, link, date, content});
   1.213 +    }
   1.214 +
   1.215 +    parseAtom03Feed() {
   1.216 +        let title;
   1.217 +        let subtitle;
   1.218 +        let logo;
   1.219 +        let entries = [];
   1.220 +        let documentElement = this.document.documentElement;
   1.221 +
   1.222 +        let titleElement = feedQueryXPath(this.document, documentElement,
   1.223 +                './atom03:title');
   1.224 +        if (titleElement !== null) {
   1.225 +            try {
   1.226 +                title = this.parseAtom03ContentConstruct(titleElement);
   1.227 +            } catch (e) {
   1.228 +                if (!(e instanceof TypeError)) {
   1.229 +                    throw e;
   1.230 +                }
   1.231 +            }
   1.232 +        }
   1.233 +
   1.234 +        let taglineElement = feedQueryXPath(this.document, documentElement,
   1.235 +                './atom03:tagline');
   1.236 +        if (taglineElement !== null) {
   1.237 +            try {
   1.238 +                title = this.parseAtom03ContentConstruct(taglineElement);
   1.239 +            } catch (e) {
   1.240 +                if (!(e instanceof TypeError)) {
   1.241 +                    throw e;
   1.242 +                }
   1.243 +            }
   1.244 +        }
   1.245 +
   1.246 +        let logoElement = feedQueryXPath(this.document, documentElement,
   1.247 +                './atom03:logo');
   1.248 +        if (logoElement !== null) {
   1.249 +            try {
   1.250 +                logo = this.parseAtomLogo(logoElement);
   1.251 +            } catch (e) {
   1.252 +                if (!(e instanceof TypeError)) {
   1.253 +                    throw e;
   1.254 +                }
   1.255 +            }
   1.256 +        }
   1.257 +
   1.258 +        let entryElements = feedQueryXPathAll(this.document, documentElement,
   1.259 +                './atom03:entry');
   1.260 +        for (let entryElement of entryElements) {
   1.261 +            entries.push(this.parseAtom03Entry(entryElement));
   1.262 +        }
   1.263 +
   1.264 +        return new Feed(this.url, {title, subtitle, logo, entries});
   1.265 +    }
   1.266 +
   1.267      parseAtomLogo(logoElement) {
   1.268          let url = parseURL(logoElement.textContent.trim(), this.url);
   1.269          if (url === null) {
   1.270 @@ -549,14 +767,17 @@
   1.271          this.url = url;
   1.272          this.document = new DOMParser().parseFromString(xmlString,
   1.273                  'application/xml');
   1.274 -        if (this.document.documentElement.nodeName.toLowerCase() ===
   1.275 -                'parsererror') {
   1.276 +        if (this.document.documentElement.namespaceURI === XMLNS.PARSERERROR) {
   1.277              throw new ParserError(this.document.documentElement.textContent);
   1.278          }
   1.279  
   1.280          let [type, version] = this.constructor.probeFeed(this.document);
   1.281          if (type === 'atom') {
   1.282 -            return this.parseAtomFeed();
   1.283 +            if (version === '0.3') {
   1.284 +                return this.parseAtom03Feed();
   1.285 +            } else if (version === '1.0') {
   1.286 +                return this.parseAtomFeed();
   1.287 +            }
   1.288          } else if (type === 'rss') {
   1.289              if (version === '0.9') {
   1.290                  return this.parseRSS1Feed();