comparison js/feed-parser.js @ 19:3fcd2209b39a

Add support for RSS 1.0 feeds
author Guido Berhoerster <guido+feed-preview@berhoerster.name>
date Thu, 13 Dec 2018 09:02:40 +0100
parents 15db49e77deb
children da483ce3832d
comparison
equal deleted inserted replaced
18:15db49e77deb 19:3fcd2209b39a
10 10
11 export const XMLNS = { 11 export const XMLNS = {
12 ATOM03: 'http://purl.org/atom/ns#', 12 ATOM03: 'http://purl.org/atom/ns#',
13 ATOM10: 'http://www.w3.org/2005/Atom', 13 ATOM10: 'http://www.w3.org/2005/Atom',
14 RSS09: 'http://my.netscape.com/rdf/simple/0.9/', 14 RSS09: 'http://my.netscape.com/rdf/simple/0.9/',
15 RSS10: 'http://purl.org/rss/1.0/',
15 XHTML: 'http://www.w3.org/1999/xhtml', 16 XHTML: 'http://www.w3.org/1999/xhtml',
16 PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml' 17 PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml'
17 } 18 }
18 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']); 19 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']);
19 20
68 switch (prefix) { 69 switch (prefix) {
69 case 'atom03': 70 case 'atom03':
70 return XMLNS.ATOM03; 71 return XMLNS.ATOM03;
71 case 'atom': 72 case 'atom':
72 return XMLNS.ATOM10; 73 return XMLNS.ATOM10;
73 case 'rss': 74 case 'rss09':
74 return XMLNS.RSS09; 75 return XMLNS.RSS09;
76 case 'rss10':
77 return XMLNS.RSS10;
75 } 78 }
76 return null; 79 return null;
77 } 80 }
78 81
79 function feedQueryXPath(feedDocument, scopeElement, xpathQuery) { 82 function feedQueryXPath(feedDocument, scopeElement, xpathQuery) {
212 return ['rss', version]; 215 return ['rss', version];
213 } 216 }
214 } else if (documentElement.localName.toLowerCase() === 'rdf' && 217 } else if (documentElement.localName.toLowerCase() === 'rdf' &&
215 documentElement.getAttribute('xmlns') === XMLNS.RSS09) { 218 documentElement.getAttribute('xmlns') === XMLNS.RSS09) {
216 return ['rss', '0.9']; 219 return ['rss', '0.9'];
220 } else if (documentElement.localName.toLowerCase() === 'rdf' &&
221 documentElement.getAttribute('xmlns') === XMLNS.RSS10) {
222 return ['rss', '1.0'];
217 } 223 }
218 224
219 return [undefined, undefined]; 225 return [undefined, undefined];
220 } 226 }
221 227
586 } 592 }
587 593
588 return new Feed(this.url, {title, subtitle, logo, entries}); 594 return new Feed(this.url, {title, subtitle, logo, entries});
589 } 595 }
590 596
591 parseRSS1Logo(imageElement) { 597 parseRSS1Logo(imageElement, nsPrefix) {
592 let title; 598 let title;
593 let urlElement = feedQueryXPath(this.document, imageElement, 599 let urlElement = feedQueryXPath(this.document, imageElement,
594 './rss:url'); 600 `./${nsPrefix}:url`);
595 if (urlElement === null) { 601 if (urlElement === null) {
596 throw new TypeError('missing <url> element in <logo> element'); 602 throw new TypeError('missing <url> element in <logo> element');
597 } 603 }
598 let url = parseURL(urlElement.textContent.trim(), this.url); 604 let url = parseURL(urlElement.textContent.trim(), this.url);
599 if (url === null) { 605 if (url === null) {
600 throw new TypeError('invalid URL in <logo> element'); 606 throw new TypeError('invalid URL in <logo> element');
601 } 607 }
602 608
603 let titleElement = feedQueryXPath(this.document, imageElement, 609 let titleElement = feedQueryXPath(this.document, imageElement,
604 './rss:title'); 610 `./${nsPrefix}:title`);
605 if (titleElement !== null) { 611 if (titleElement !== null) {
606 title = titleElement.textContent.trim(); 612 title = titleElement.textContent.trim();
607 } 613 }
608 614
609 return new FeedLogo(url, {title}); 615 return new FeedLogo(url, {title});
610 } 616 }
611 617
612 parseRSS1Entry(itemElement) { 618 parseRSS1Entry(itemElement, nsPrefix) {
613 let title; 619 let title;
614 let link; 620 let link;
615 let titleElement = feedQueryXPath(this.document, itemElement, 621 let titleElement = feedQueryXPath(this.document, itemElement,
616 './rss:title'); 622 `./${nsPrefix}:title`);
617 if (titleElement !== null) { 623 if (titleElement !== null) {
618 title = titleElement.textContent; 624 title = titleElement.textContent;
619 } 625 }
620 626
621 let linkElement = feedQueryXPath(this.document, itemElement, 627 let linkElement = feedQueryXPath(this.document, itemElement,
622 './rss:link'); 628 `./${nsPrefix}:link`);
623 if (linkElement !== null) { 629 if (linkElement !== null) {
624 link = parseURL(linkElement.textContent, this.url); 630 link = parseURL(linkElement.textContent, this.url);
625 } 631 }
626 632
627 return new FeedEntry({title, link}); 633 return new FeedEntry({title, link});
628 } 634 }
629 635
630 parseRSS1Feed() { 636 parseRSS1Feed(version) {
637 let nsPrefix = version === '0.9' ? 'rss09' : 'rss10';
631 let title; 638 let title;
632 let subtitle; 639 let subtitle;
633 let logo; 640 let logo;
634 let entries = []; 641 let entries = [];
635 let documentElement = this.document.documentElement; 642 let documentElement = this.document.documentElement;
636 let titleElement = feedQueryXPath(this.document, documentElement, 643 let titleElement = feedQueryXPath(this.document, documentElement,
637 './rss:channel/rss:title'); 644 `./${nsPrefix}:channel/${nsPrefix}:title`);
638 if (titleElement !== null) { 645 if (titleElement !== null) {
639 title = titleElement.textContent; 646 title = titleElement.textContent;
640 } 647 }
641 648
642 let descriptionElement = feedQueryXPath(this.document, documentElement, 649 let descriptionElement = feedQueryXPath(this.document, documentElement,
643 './rss:channel/rss:description'); 650 `./${nsPrefix}:channel/${nsPrefix}:description`);
644 if (descriptionElement !== null) { 651 if (descriptionElement !== null) {
645 subtitle = descriptionElement.textContent; 652 subtitle = descriptionElement.textContent;
646 } 653 }
647 654
648 let imageElement = feedQueryXPath(this.document, documentElement, 655 let imageElement = feedQueryXPath(this.document, documentElement,
649 './rss:image'); 656 `./${nsPrefix}:image`);
650 if (imageElement !== null) { 657 if (imageElement !== null) {
651 try { 658 try {
652 logo = this.parseRSS1Logo(imageElement); 659 logo = this.parseRSS1Logo(imageElement, nsPrefix);
653 } catch (e) { 660 } catch (e) {
654 if (!(e instanceof TypeError)) { 661 if (!(e instanceof TypeError)) {
655 throw e; 662 throw e;
656 } 663 }
657 } 664 }
658 } 665 }
659 666
660 let itemElements = feedQueryXPathAll(this.document, documentElement, 667 let itemElements = feedQueryXPathAll(this.document, documentElement,
661 './rss:item'); 668 `./${nsPrefix}:item`);
662 for (let itemElement of itemElements) { 669 for (let itemElement of itemElements) {
663 let entry = this.parseRSS1Entry(itemElement); 670 let entry = this.parseRSS1Entry(itemElement, nsPrefix);
664 if (typeof entry !== 'undefined') { 671 if (typeof entry !== 'undefined') {
665 entries.push(entry); 672 entries.push(entry);
666 } 673 }
667 } 674 }
668 675
811 return this.parseAtom03Feed(); 818 return this.parseAtom03Feed();
812 } else if (version === '1.0') { 819 } else if (version === '1.0') {
813 return this.parseAtomFeed(); 820 return this.parseAtomFeed();
814 } 821 }
815 } else if (type === 'rss') { 822 } else if (type === 'rss') {
816 if (version === '0.9') { 823 if (version === '0.9' || version === '1.0') {
817 return this.parseRSS1Feed(); 824 return this.parseRSS1Feed(version);
818 } else { 825 } else {
819 return this.parseRSS2Feed(); 826 return this.parseRSS2Feed();
820 } 827 }
821 } 828 }
822 throw new UnsupportedFeedTypeError(); 829 throw new UnsupportedFeedTypeError();