Mercurial > addons > firefox-addons > feed-preview
comparison js/feed-parser.js @ 6:5d7c13e998e9
Create feed previews using a stream filter
Instead of replacing the feed document with an XHTML preview from a content
script after it has already been rendered, create an XHTML preview using a
stream filter before it is passed into the rendering engine and use an XSL
style sheet to convert it to HTML. This has two advantages, firstly it
results in an HTMLDocument with the full HTML DOM available and secondly it
avoids rendering the document twice.
Refactor the feed preview creation and split parsing and rendering into
seperate modules.
author | Guido Berhoerster <guido+feed-preview@berhoerster.name> |
---|---|
date | Thu, 08 Nov 2018 16:30:34 +0100 |
parents | |
children | 2bbb7617dd13 |
comparison
equal
deleted
inserted
replaced
5:341a0f4b7ce0 | 6:5d7c13e998e9 |
---|---|
1 /* | |
2 * Copyright (C) 2018 Guido Berhoerster <guido+feed-preview@berhoerster.name> | |
3 * | |
4 * This Source Code Form is subject to the terms of the Mozilla Public | |
5 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
7 */ | |
8 | |
9 'use strict'; | |
10 | |
11 export const XMLNS = { | |
12 ATOM10: 'http://www.w3.org/2005/Atom', | |
13 RSS09: 'http://my.netscape.com/rdf/simple/0.9/' | |
14 } | |
15 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']); | |
16 | |
17 function encodeXML(str) { | |
18 return str.replace(/[<>&'"]/g, c => { | |
19 switch (c) { | |
20 case '<': return '<'; | |
21 case '>': return '>'; | |
22 case '&': return '&'; | |
23 case '\'': return '''; | |
24 case '"': return '"'; | |
25 } | |
26 }); | |
27 } | |
28 | |
29 function parseDate(s) { | |
30 let date = new Date(s); | |
31 | |
32 return isNaN(date) ? new Date(0) : date; | |
33 } | |
34 | |
35 function parseURL(text, baseURL = '') { | |
36 let url; | |
37 | |
38 try { | |
39 url = new URL(text, baseURL); | |
40 } catch (e) { | |
41 return null; | |
42 } | |
43 if (!ALLOWED_LINK_PROTOCOLS.has(url.protocol)) { | |
44 return null; | |
45 } | |
46 | |
47 return url; | |
48 } | |
49 | |
50 function feedNSResolver(prefix) { | |
51 switch (prefix) { | |
52 case 'atom': | |
53 return XMLNS.ATOM10; | |
54 case 'rss': | |
55 return XMLNS.RSS09; | |
56 } | |
57 return null; | |
58 } | |
59 | |
60 function feedQueryXPath(feedDocument, scopeElement, xpathQuery) { | |
61 return feedDocument.evaluate(xpathQuery, scopeElement, feedNSResolver, | |
62 XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; | |
63 } | |
64 | |
65 function feedQueryXPathAll(feedDocument, scopeElement, xpathQuery) { | |
66 let result = feedDocument.evaluate(xpathQuery, scopeElement, feedNSResolver, | |
67 XPathResult.ORDERED_NODE_ITERATOR_TYPE, null); | |
68 let nodes = []; | |
69 for (let node = result.iterateNext(); node !== null; | |
70 node = result.iterateNext()) { | |
71 nodes.push(node); | |
72 } | |
73 | |
74 return nodes; | |
75 } | |
76 | |
77 export class ParserError extends Error { | |
78 constructor() { | |
79 super(...arguments); | |
80 this.name = this.constructor.name; | |
81 } | |
82 } | |
83 | |
84 export class UnsupportedFeedTypeError extends Error { | |
85 constructor(message = 'Document is not a supported feed', ...params) { | |
86 super(message, ...params); | |
87 this.name = this.constructor.name; | |
88 } | |
89 } | |
90 | |
91 export class ProtocolError extends Error { | |
92 constructor(url, status, statusText, ...params) { | |
93 let message = `Protocol error: Transfer of ${url} failed with: ` + | |
94 `${status} ${statusText}` | |
95 super(message, ...params); | |
96 this.name = this.constructor.name; | |
97 this.url = url; | |
98 this.status = status; | |
99 this.statusText = statusText; | |
100 } | |
101 } | |
102 | |
103 class FeedLogo { | |
104 constructor(url, {title = ''} = {}) { | |
105 this.url = url; | |
106 this.title = title; | |
107 } | |
108 } | |
109 | |
110 class FeedEntryFile { | |
111 constructor(url, {type = browser.i18n.getMessage('defaultFileType'), | |
112 size = 0} = {}) { | |
113 this.filename = undefined; | |
114 this._url = undefined; | |
115 this.url = url; | |
116 this.type = type; | |
117 this.size = size; | |
118 } | |
119 | |
120 set url(url) { | |
121 this._url = url; | |
122 let filename = url.pathname.split('/').pop(); | |
123 this.filename = filename !== '' ? filename : | |
124 browser.i18n.getMessage('defaultFileName'); | |
125 } | |
126 | |
127 get url() { | |
128 return this._url; | |
129 } | |
130 } | |
131 | |
132 class FeedEntry { | |
133 constructor({title = browser.i18n.getMessage('defaultFeedEntryTitle'), | |
134 link = undefined, date = new Date(0), content = '', | |
135 files = []} = {}) { | |
136 this.title = title; | |
137 this.link = link; | |
138 this.date = date; | |
139 this._content = undefined; | |
140 this.content = content; | |
141 this.files = files; | |
142 } | |
143 | |
144 normalizeContent(text) { | |
145 if (typeof text === 'undefined') { | |
146 return | |
147 } | |
148 | |
149 let contentDocument = document.implementation.createHTMLDocument(); | |
150 let parsedDocument = new DOMParser().parseFromString(text, 'text/html'); | |
151 contentDocument.body = contentDocument.adoptNode(parsedDocument.body); | |
152 return new XMLSerializer().serializeToString(contentDocument); | |
153 } | |
154 | |
155 set content(content) { | |
156 this._content = this.normalizeContent(content); | |
157 } | |
158 | |
159 get content() { | |
160 return this._content; | |
161 } | |
162 } | |
163 | |
164 class Feed { | |
165 constructor(url, {title = browser.i18n.getMessage('defaultFeedTitle'), | |
166 subtitle = '', logo, entries = []} = {}) { | |
167 this.url = url; | |
168 this.title = title; | |
169 this.subtitle = subtitle; | |
170 this.logo = logo; | |
171 this.entries = entries; | |
172 } | |
173 } | |
174 | |
175 export class FeedParser { | |
176 static probeFeed(feedDocument) { | |
177 let documentElement = feedDocument.documentElement; | |
178 if (documentElement.nodeName === 'feed' && | |
179 documentElement.namespaceURI === XMLNS.ATOM10) { | |
180 let version = documentElement.getAttribute('version'); | |
181 if (version === null) { | |
182 version = '1.0'; | |
183 } | |
184 if (version === '1.0') { | |
185 return ['atom', version]; | |
186 } | |
187 } else if (documentElement.nodeName === 'rss') { | |
188 let version = documentElement.getAttribute('version'); | |
189 switch (version) { | |
190 case '0.90': | |
191 case '0.91': | |
192 case '0.92': | |
193 case '0.93': | |
194 case '0.94': | |
195 case '2.0': | |
196 return ['rss', version]; | |
197 } | |
198 } else if (documentElement.localName.toLowerCase() === 'rdf' && | |
199 documentElement.getAttribute('xmlns') === XMLNS.RSS09) { | |
200 return ['rss', '0.9']; | |
201 } | |
202 | |
203 return [undefined, undefined]; | |
204 } | |
205 | |
206 constructor() { | |
207 this.url = undefined; | |
208 this.document = undefined; | |
209 } | |
210 | |
211 parseAtomLogo(logoElement) { | |
212 let url = parseURL(logoElement.textContent.trim(), this.url); | |
213 if (url === null) { | |
214 throw new TypeError('invalid URL in <logo> element'); | |
215 } | |
216 return new FeedLogo(url); | |
217 } | |
218 | |
219 parseAtomEntry(entryElement) { | |
220 let title; | |
221 let link; | |
222 let date; | |
223 let content; | |
224 let titleElement = feedQueryXPath(this.document, entryElement, | |
225 './atom:title'); | |
226 if (titleElement !== null) { | |
227 title = titleElement.textContent.trim(); | |
228 } | |
229 | |
230 let linkElement = feedQueryXPath(this.document, entryElement, | |
231 './atom:link[@href][@rel="alternate"]'); | |
232 if (linkElement !== null) { | |
233 link = parseURL(linkElement.getAttribute('href'), this.url); | |
234 } | |
235 | |
236 let updatedElement = feedQueryXPath(this.document, entryElement, | |
237 './atom:updated'); | |
238 if (updatedElement !== null) { | |
239 date = parseDate(updatedElement.textContent); | |
240 } | |
241 | |
242 let contentElement = feedQueryXPath(this.document, entryElement, | |
243 './atom:content'); | |
244 if (contentElement === null) { | |
245 contentElement = feedQueryXPath(this.document, entryElement, | |
246 './atom:summary'); | |
247 } | |
248 if (contentElement !== null) { | |
249 let contentType = contentElement.getAttribute('type'); | |
250 if (contentType === null) { | |
251 contentType = 'text'; | |
252 } | |
253 contentType = contentType.toLowerCase(); | |
254 if (contentType === 'xhtml') { | |
255 content = contentElement.innerHTML; | |
256 } else if (contentType === 'html') { | |
257 content = contentElement.textContent; | |
258 } else { | |
259 let encodedContent = | |
260 encodeXML(contentElement.textContent.trim()); | |
261 content = `<pre>${encodedContent}</pre>`; | |
262 } | |
263 } | |
264 | |
265 return new FeedEntry({title, link, date, content}); | |
266 } | |
267 | |
268 parseAtomFeed() { | |
269 let title; | |
270 let subtitle; | |
271 let logo; | |
272 let entries = []; | |
273 let documentElement = this.document.documentElement; | |
274 | |
275 let titleElement = feedQueryXPath(this.document, documentElement, | |
276 './atom:title'); | |
277 if (titleElement !== null) { | |
278 title = titleElement.textContent.trim(); | |
279 } | |
280 | |
281 let subtitleElement = feedQueryXPath(this.document, documentElement, | |
282 './atom:subtitle'); | |
283 if (subtitleElement !== null) { | |
284 subtitle = subtitleElement.textContent.trim(); | |
285 } | |
286 | |
287 let logoElement = feedQueryXPath(this.document, documentElement, | |
288 './atom:logo'); | |
289 if (logoElement !== null) { | |
290 try { | |
291 logo = this.parseAtomLogo(logoElement); | |
292 } catch (e) { | |
293 if (!(e instanceof TypeError)) { | |
294 throw e; | |
295 } | |
296 } | |
297 } | |
298 | |
299 let entryElements = feedQueryXPathAll(this.document, documentElement, | |
300 './atom:entry'); | |
301 for (let entryElement of entryElements) { | |
302 entries.push(this.parseAtomEntry(entryElement)); | |
303 } | |
304 | |
305 return new Feed(this.url, {title, subtitle, logo, entries}); | |
306 } | |
307 | |
308 parseRSS1Logo(imageElement) { | |
309 let title; | |
310 let urlElement = feedQueryXPath(this.document, imageElement, | |
311 './rss:url'); | |
312 if (urlElement === null) { | |
313 throw new TypeError('missing <url> element in <logo> element'); | |
314 } | |
315 let url = parseURL(urlElement.textContent.trim(), this.url); | |
316 if (url === null) { | |
317 throw new TypeError('invalid URL in <logo> element'); | |
318 } | |
319 | |
320 let titleElement = feedQueryXPath(this.document, imageElement, | |
321 './rss:title'); | |
322 if (titleElement !== null) { | |
323 title = titleElement.textContent.trim(); | |
324 } | |
325 | |
326 return new FeedLogo(url, {title}); | |
327 } | |
328 | |
329 parseRSS1Entry(itemElement) { | |
330 let title; | |
331 let link; | |
332 let titleElement = feedQueryXPath(this.document, itemElement, | |
333 './rss:title'); | |
334 if (titleElement !== null) { | |
335 title = titleElement.textContent; | |
336 } | |
337 | |
338 let linkElement = feedQueryXPath(this.document, itemElement, | |
339 './rss:link'); | |
340 if (linkElement !== null) { | |
341 link = parseURL(linkElement.textContent, this.url); | |
342 } | |
343 | |
344 return new FeedEntry({title, link}); | |
345 } | |
346 | |
347 parseRSS1Feed() { | |
348 let title; | |
349 let subtitle; | |
350 let logo; | |
351 let entries = []; | |
352 let documentElement = this.document.documentElement; | |
353 let titleElement = feedQueryXPath(this.document, documentElement, | |
354 './rss:channel/rss:title'); | |
355 if (titleElement !== null) { | |
356 title = titleElement.textContent; | |
357 } | |
358 | |
359 let descriptionElement = feedQueryXPath(this.document, documentElement, | |
360 './channel/description'); | |
361 if (descriptionElement !== null) { | |
362 subtitle = descriptionElement.textContent; | |
363 } | |
364 | |
365 let imageElement = feedQueryXPath(this.document, documentElement, | |
366 './rss:image'); | |
367 if (imageElement !== null) { | |
368 try { | |
369 logo = this.parseRSS1Logo(imageElement); | |
370 } catch (e) { | |
371 if (!(e instanceof TypeError)) { | |
372 throw e; | |
373 } | |
374 } | |
375 } | |
376 | |
377 let itemElements = feedQueryXPathAll(this.document, documentElement, | |
378 './rss:item'); | |
379 for (let itemElement of itemElements) { | |
380 let entry = this.parseRSS1Entry(itemElement); | |
381 if (typeof entry !== 'undefined') { | |
382 entries.push(entry); | |
383 } | |
384 } | |
385 | |
386 return new Feed(this.url, {title, subtitle, logo, entries}); | |
387 } | |
388 | |
389 parseRSS2Logo(imageElement) { | |
390 let title; | |
391 let urlElement = feedQueryXPath(this.document, imageElement, './url'); | |
392 if (urlElement === null) { | |
393 throw new TypeError('missing <url> element in <logo> element'); | |
394 } | |
395 let url = parseURL(urlElement.textContent.trim(), this.url); | |
396 if (url === null) { | |
397 throw new TypeError('invalid URL in <logo> element'); | |
398 } | |
399 | |
400 let titleElement = feedQueryXPath(this.document, imageElement, | |
401 './title'); | |
402 if (titleElement !== null) { | |
403 title = titleElement.textContent.trim(); | |
404 } | |
405 | |
406 return new FeedLogo(url, {title}); | |
407 } | |
408 | |
409 parseRSS2EntryFile(enclosureElement) { | |
410 let type; | |
411 let size; | |
412 let url = parseURL(enclosureElement.getAttribute('url'), this.url); | |
413 if (url === null) { | |
414 throw new TypeError('invalid URL in <enclosure> element'); | |
415 } | |
416 | |
417 let typeAttribute = enclosureElement.getAttribute('type'); | |
418 if (typeAttribute !== null) { | |
419 type = typeAttribute; | |
420 } | |
421 | |
422 let length = parseInt(enclosureElement.getAttribute('length'), | |
423 10); | |
424 if (!isNaN(length)) { | |
425 size = length; | |
426 } | |
427 | |
428 return new FeedEntryFile(url, {type, size}); | |
429 } | |
430 | |
431 parseRSS2Entry(itemElement) { | |
432 let title; | |
433 let link; | |
434 let date; | |
435 let content; | |
436 let files = []; | |
437 let titleElement = feedQueryXPath(this.document, itemElement, | |
438 './title'); | |
439 if (titleElement !== null) { | |
440 title = titleElement.textContent; | |
441 } | |
442 | |
443 let linkElement = feedQueryXPath(this.document, itemElement, './link'); | |
444 if (linkElement !== null) { | |
445 link = parseURL(linkElement.textContent, this.url); | |
446 } | |
447 | |
448 let pubDateElement = feedQueryXPath(this.document, itemElement, | |
449 './pubDate'); | |
450 if (pubDateElement !== null) { | |
451 date = parseDate(pubDateElement.textContent); | |
452 } | |
453 | |
454 let descriptionElement = feedQueryXPath(this.document, itemElement, | |
455 './description'); | |
456 if (descriptionElement !== null) { | |
457 content = descriptionElement.textContent.trim(); | |
458 } | |
459 | |
460 for (let enclosureElement of | |
461 feedQueryXPathAll(this.document, itemElement, './enclosure')) { | |
462 try { | |
463 let entryFile = this.parseRSS2EntryFile(enclosureElement); | |
464 files.push(entryFile); | |
465 } catch (e) { | |
466 if (!(e instanceof TypeError)) { | |
467 throw e; | |
468 } | |
469 } | |
470 } | |
471 | |
472 return new FeedEntry({title, link, date, content, files}); | |
473 } | |
474 | |
475 parseRSS2Feed() { | |
476 let title; | |
477 let subtitle; | |
478 let logo; | |
479 let entries = []; | |
480 let documentElement = this.document.documentElement; | |
481 let titleElement = feedQueryXPath(this.document, documentElement, | |
482 './channel/title'); | |
483 if (titleElement !== null) { | |
484 title = titleElement.textContent; | |
485 } | |
486 | |
487 let descriptionElement = feedQueryXPath(this.document, documentElement, | |
488 './channel/description'); | |
489 if (descriptionElement !== null) { | |
490 subtitle = descriptionElement.textContent; | |
491 } | |
492 | |
493 let imageElement = feedQueryXPath(this.document, documentElement, | |
494 './channel/image'); | |
495 if (imageElement !== null) { | |
496 try { | |
497 logo = this.parseRSS2Logo(imageElement); | |
498 } catch (e) { | |
499 if (!(e instanceof TypeError)) { | |
500 throw e; | |
501 } | |
502 } | |
503 } | |
504 | |
505 let itemElements = feedQueryXPathAll(this.document, documentElement, | |
506 './channel/item'); | |
507 for (let itemElement of itemElements) { | |
508 let entry = this.parseRSS2Entry(itemElement); | |
509 if (typeof entry !== 'undefined') { | |
510 entries.push(entry); | |
511 } | |
512 } | |
513 | |
514 return new Feed(this.url, {title, subtitle, logo, entries}); | |
515 } | |
516 | |
517 parseFromString(xmlString, url) { | |
518 this.url = url; | |
519 this.document = new DOMParser().parseFromString(xmlString, | |
520 'application/xml'); | |
521 if (this.document.documentElement.nodeName.toLowerCase() === | |
522 'parsererror') { | |
523 throw new ParserError(this.document.documentElement.textContent); | |
524 } | |
525 | |
526 let [type, version] = this.constructor.probeFeed(this.document); | |
527 if (type === 'atom') { | |
528 return this.parseAtomFeed(); | |
529 } else if (type === 'rss') { | |
530 if (version === '0.9') { | |
531 return this.parseRSS1Feed(); | |
532 } else { | |
533 return this.parseRSS2Feed(); | |
534 } | |
535 } | |
536 throw new UnsupportedFeedTypeError(); | |
537 } | |
538 } |