comparison js/feed-parser.js @ 48:d5506fdb65f0

Add support for Dublin Core elements to the RSS 2.0 feed parser
author Guido Berhoerster <guido+feed-preview@berhoerster.name>
date Fri, 05 Jul 2019 13:29:29 +0200
parents b68880838990
children
comparison
equal deleted inserted replaced
47:b68880838990 48:d5506fdb65f0
12 ATOM03: 'http://purl.org/atom/ns#', 12 ATOM03: 'http://purl.org/atom/ns#',
13 ATOM10: 'http://www.w3.org/2005/Atom', 13 ATOM10: 'http://www.w3.org/2005/Atom',
14 RSS09: 'http://my.netscape.com/rdf/simple/0.9/', 14 RSS09: 'http://my.netscape.com/rdf/simple/0.9/',
15 RSS10: 'http://purl.org/rss/1.0/', 15 RSS10: 'http://purl.org/rss/1.0/',
16 CONTENT: 'http://purl.org/rss/1.0/modules/content/', 16 CONTENT: 'http://purl.org/rss/1.0/modules/content/',
17 DC: 'http://purl.org/dc/elements/1.1/',
17 XHTML: 'http://www.w3.org/1999/xhtml', 18 XHTML: 'http://www.w3.org/1999/xhtml',
18 PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml' 19 PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml'
19 } 20 }
20 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']); 21 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']);
21 22
76 return XMLNS.RSS09; 77 return XMLNS.RSS09;
77 case 'rss10': 78 case 'rss10':
78 return XMLNS.RSS10; 79 return XMLNS.RSS10;
79 case 'content': 80 case 'content':
80 return XMLNS.CONTENT; 81 return XMLNS.CONTENT;
82 case 'dc':
83 return XMLNS.DC;
81 } 84 }
82 return null; 85 return null;
83 } 86 }
84 87
85 function feedQueryXPath(feedDocument, scopeElement, xpathQuery) { 88 function feedQueryXPath(feedDocument, scopeElement, xpathQuery) {
733 let title; 736 let title;
734 let link; 737 let link;
735 let date; 738 let date;
736 let content; 739 let content;
737 let files = []; 740 let files = [];
738 let titleElement = feedQueryXPath(this.document, itemElement, 741 let titleElement;
739 './title'); 742 let dateElement;
743 let contentElement;
744
745 titleElement = feedQueryXPath(this.document, itemElement,
746 './dc:title');
747 if (titleElement === null) {
748 titleElement = feedQueryXPath(this.document, itemElement,
749 './title');
750 }
740 if (titleElement !== null) { 751 if (titleElement !== null) {
741 title = titleElement.textContent; 752 title = titleElement.textContent;
742 } 753 }
743 754
744 let linkElement = feedQueryXPath(this.document, itemElement, './link'); 755 let linkElement = feedQueryXPath(this.document, itemElement, './link');
745 if (linkElement !== null) { 756 if (linkElement !== null) {
746 link = parseURL(linkElement.textContent, this.url); 757 link = parseURL(linkElement.textContent, this.url);
747 } 758 }
748 759
749 let pubDateElement = feedQueryXPath(this.document, itemElement, 760 dateElement = feedQueryXPath(this.document, itemElement, './dc:date');
750 './pubDate'); 761 if (dateElement === null) {
751 if (pubDateElement !== null) { 762 dateElement = feedQueryXPath(this.document, itemElement,
752 date = parseDate(pubDateElement.textContent); 763 './pubDate');
753 } 764 }
754 765 if (dateElement !== null) {
755 let encodedElement = feedQueryXPath(this.document, itemElement, 766 date = parseDate(dateElement.textContent);
767 }
768
769 contentElement = feedQueryXPath(this.document, itemElement,
756 './content:encoded'); 770 './content:encoded');
757 if (encodedElement !== null) { 771 if (contentElement === null) {
758 content = encodedElement.textContent.trim(); 772 contentElement = feedQueryXPath(this.document, itemElement,
759 } else { 773 './dc:description');
760 let descriptionElement = feedQueryXPath(this.document, itemElement, 774 }
775 if (contentElement === null) {
776 contentElement = feedQueryXPath(this.document, itemElement,
761 './description'); 777 './description');
762 if (descriptionElement !== null) { 778 }
763 content = descriptionElement.textContent.trim(); 779 if (contentElement !== null) {
764 } 780 content = contentElement.textContent.trim();
765 } 781 }
766 782
767 for (let enclosureElement of 783 for (let enclosureElement of
768 feedQueryXPathAll(this.document, itemElement, './enclosure')) { 784 feedQueryXPathAll(this.document, itemElement, './enclosure')) {
769 try { 785 try {
783 let title; 799 let title;
784 let subtitle; 800 let subtitle;
785 let logo; 801 let logo;
786 let entries = []; 802 let entries = [];
787 let documentElement = this.document.documentElement; 803 let documentElement = this.document.documentElement;
788 let titleElement = feedQueryXPath(this.document, documentElement, 804 let titleElement;
789 './channel/title'); 805 let descriptionElement;
806
807
808 titleElement = feedQueryXPath(this.document, documentElement,
809 './channel/dc:title');
810 if (titleElement === null) {
811 titleElement = feedQueryXPath(this.document, documentElement,
812 './channel/title');
813 }
790 if (titleElement !== null) { 814 if (titleElement !== null) {
791 title = titleElement.textContent; 815 title = titleElement.textContent;
792 } 816 }
793 817
794 let descriptionElement = feedQueryXPath(this.document, documentElement, 818 descriptionElement = feedQueryXPath(this.document, documentElement,
795 './channel/description'); 819 './channel/dc:description');
820 if (descriptionElement === null) {
821 descriptionElement = feedQueryXPath(this.document, documentElement,
822 './channel/description');
823 }
796 if (descriptionElement !== null) { 824 if (descriptionElement !== null) {
797 subtitle = descriptionElement.textContent; 825 subtitle = descriptionElement.textContent;
798 } 826 }
799 827
800 let imageElement = feedQueryXPath(this.document, documentElement, 828 let imageElement = feedQueryXPath(this.document, documentElement,