Mercurial > addons > firefox-addons > feed-preview
comparison js/feed-parser.js @ 15:150f07c7595f
Add support for Atom 0.3 feeds
author | Guido Berhoerster <guido+feed-preview@berhoerster.name> |
---|---|
date | Mon, 10 Dec 2018 23:24:36 +0100 |
parents | 376a0e415bba |
children | 48cabd01ef64 |
comparison
equal
deleted
inserted
replaced
14:376a0e415bba | 15:150f07c7595f |
---|---|
7 */ | 7 */ |
8 | 8 |
9 'use strict'; | 9 'use strict'; |
10 | 10 |
11 export const XMLNS = { | 11 export const XMLNS = { |
12 ATOM03: 'http://purl.org/atom/ns#', | |
12 ATOM10: 'http://www.w3.org/2005/Atom', | 13 ATOM10: 'http://www.w3.org/2005/Atom', |
13 RSS09: 'http://my.netscape.com/rdf/simple/0.9/', | 14 RSS09: 'http://my.netscape.com/rdf/simple/0.9/', |
14 XHTML: 'http://www.w3.org/1999/xhtml' | 15 XHTML: 'http://www.w3.org/1999/xhtml', |
16 PARSERERROR: 'http://www.mozilla.org/newlayout/xml/parsererror.xml' | |
15 } | 17 } |
16 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']); | 18 const ALLOWED_LINK_PROTOCOLS = new Set(['http:', 'https:', 'ftp:']); |
17 | 19 |
18 function encodeXML(str) { | 20 function encodeXML(str) { |
19 return str.replace(/[<>&'"]/g, c => { | 21 return str.replace(/[<>&'"]/g, c => { |
46 } | 48 } |
47 | 49 |
48 return url; | 50 return url; |
49 } | 51 } |
50 | 52 |
53 function base64Decode(base64Str) { | |
54 let encodedText; | |
55 try { | |
56 encodedText = atob(base64Str); | |
57 } catch (e) { | |
58 throw (e instanceof DOMException) ? new TypeError(e.message) : e; | |
59 } | |
60 let byteBuffer = new Uint8Array(new ArrayBuffer(encodedText.length)); | |
61 for (let i = 0; i < encodedText.length; i++) { | |
62 byteBuffer[i] = encodedText.charCodeAt(i); | |
63 } | |
64 return new TextDecoder().decode(byteBuffer); | |
65 } | |
66 | |
51 function feedNSResolver(prefix) { | 67 function feedNSResolver(prefix) { |
52 switch (prefix) { | 68 switch (prefix) { |
69 case 'atom03': | |
70 return XMLNS.ATOM03; | |
53 case 'atom': | 71 case 'atom': |
54 return XMLNS.ATOM10; | 72 return XMLNS.ATOM10; |
55 case 'rss': | 73 case 'rss': |
56 return XMLNS.RSS09; | 74 return XMLNS.RSS09; |
57 } | 75 } |
175 | 193 |
176 export class FeedParser { | 194 export class FeedParser { |
177 static probeFeed(feedDocument) { | 195 static probeFeed(feedDocument) { |
178 let documentElement = feedDocument.documentElement; | 196 let documentElement = feedDocument.documentElement; |
179 if (documentElement.nodeName === 'feed' && | 197 if (documentElement.nodeName === 'feed' && |
198 documentElement.namespaceURI === XMLNS.ATOM03) { | |
199 return ['atom', '0.3']; | |
200 } else if (documentElement.nodeName === 'feed' && | |
180 documentElement.namespaceURI === XMLNS.ATOM10) { | 201 documentElement.namespaceURI === XMLNS.ATOM10) { |
181 let version = documentElement.getAttribute('version'); | 202 return ['atom', '1.0']; |
182 if (version === null) { | |
183 version = '1.0'; | |
184 } | |
185 if (version === '1.0') { | |
186 return ['atom', version]; | |
187 } | |
188 } else if (documentElement.nodeName === 'rss') { | 203 } else if (documentElement.nodeName === 'rss') { |
189 let version = documentElement.getAttribute('version'); | 204 let version = documentElement.getAttribute('version'); |
190 switch (version) { | 205 switch (version) { |
191 case '0.90': | 206 case '0.90': |
192 case '0.91': | 207 case '0.91': |
207 constructor() { | 222 constructor() { |
208 this.url = undefined; | 223 this.url = undefined; |
209 this.document = undefined; | 224 this.document = undefined; |
210 } | 225 } |
211 | 226 |
227 parseAtom03ContentConstruct(containerElement, textOnly = true) { | |
228 let contentType = containerElement.getAttribute('type'); | |
229 let contentMode = containerElement.getAttribute('mode'); | |
230 if (contentType === null) { | |
231 contentType = 'text/plain'; | |
232 } | |
233 if (contentMode === null) { | |
234 contentMode = 'xml'; | |
235 } | |
236 if (contentType === 'application/xhtml+xml') { | |
237 let htmlText; | |
238 if (contentMode === 'xml') { | |
239 return textOnly ? containerElement.textContent.trim() : | |
240 containerElement.innerHTML; | |
241 } else if (contentMode === 'escaped') { | |
242 htmlText = containerElement.textContent; | |
243 } else if (contentMode === 'base64') { | |
244 htmlText = base64Decode(containerElement.textContent); | |
245 } | |
246 if (typeof htmlText === 'undefined') { | |
247 return; | |
248 } | |
249 if (textOnly) { | |
250 let htmlDocument = new DOMParser().parseFromString(htmlText, | |
251 'application/xhtml+xml'); | |
252 if (htmlDocument.documentElement.namespaceURI === | |
253 XMLNS.PARSERERROR) { | |
254 return; | |
255 } | |
256 return htmlDocument.body.textContent.trim(); | |
257 } | |
258 return htmlText; | |
259 } else if (contentType === 'text/html') { | |
260 let htmlText; | |
261 if (contentMode === 'escaped') { | |
262 htmlText = containerElement.textContent; | |
263 } else if (contentMode === 'base64') { | |
264 htmlText = base64Decode(containerElement.textContent); | |
265 } | |
266 if (typeof htmlText === 'undefined') { | |
267 return; | |
268 } | |
269 if (textOnly) { | |
270 let htmlDocument = new DOMParser().parseFromString(htmlText, | |
271 'text/html'); | |
272 return htmlDocument.body.textContent.trim(); | |
273 } | |
274 return htmlText; | |
275 } else if (contentType === 'text/plain') { | |
276 let text; | |
277 if (contentMode === 'escaped') { | |
278 text = containerElement.textContent; | |
279 } else if (contentMode === 'base64') { | |
280 text = base64Decode(containerElement.textContent); | |
281 } | |
282 if (typeof text === 'undefined') { | |
283 return; | |
284 } | |
285 return textOnly ? text : `<pre>${encodeXML(text)}</pre>`; | |
286 } | |
287 return; | |
288 } | |
289 | |
290 parseAtom03Content(contentElement) { | |
291 // ordered from lowest to highest preference | |
292 const contentTypes = [ | |
293 'text/plain', | |
294 'text/html', | |
295 'application/xhtml+xml' | |
296 ]; | |
297 if (contentElement.getAttribute('type') === 'multipart/alternative' && | |
298 contentElement.getAttribute('mode') === null) { | |
299 // select alternative according to above preference | |
300 let selectedTypeIndex = -1; | |
301 let selectedElement; | |
302 for (let innerContentElement of contentElement.children) { | |
303 if (innerContentElement.localName !== 'content' || | |
304 innerContentElement.namespaceURI !== XMLNS.ATOM03) { | |
305 throw new TypeError('child elements of a multipart ' + | |
306 ' content elements must be content elements'); | |
307 } | |
308 let innerContentType = innerContentElement.getAttribute('type'); | |
309 if (innerContentType === null) { | |
310 innerContentType = 'text/plain'; | |
311 } | |
312 let typeIndex = contentTypes.indexOf(innerContentType); | |
313 if (typeIndex > selectedTypeIndex) { | |
314 selectedTypeIndex = typeIndex; | |
315 selectedElement = innerContentElement; | |
316 } | |
317 } | |
318 if (selectedTypeIndex >= 0) { | |
319 contentElement = selectedElement; | |
320 } | |
321 } | |
322 | |
323 return this.parseAtom03ContentConstruct(contentElement, false); | |
324 } | |
325 | |
326 parseAtom03Entry(entryElement) { | |
327 let title; | |
328 let link; | |
329 let date; | |
330 let content; | |
331 let titleElement = feedQueryXPath(this.document, entryElement, | |
332 './atom03:title'); | |
333 if (titleElement !== null) { | |
334 title = titleElement.textContent.trim(); | |
335 } | |
336 | |
337 let linkElement = feedQueryXPath(this.document, entryElement, | |
338 './atom03:link[@href][@rel="alternate"]'); | |
339 if (linkElement !== null) { | |
340 link = parseURL(linkElement.getAttribute('href'), this.url); | |
341 } | |
342 | |
343 let modifiedElement = feedQueryXPath(this.document, entryElement, | |
344 './atom03:modified'); | |
345 if (modifiedElement !== null) { | |
346 date = parseDate(modifiedElement.textContent); | |
347 } | |
348 | |
349 let contentElement = feedQueryXPath(this.document, entryElement, | |
350 './atom03:content'); | |
351 if (contentElement !== null) { | |
352 try { | |
353 content = this.parseAtom03Content(contentElement); | |
354 } catch (e) { | |
355 if (!(e instanceof TypeError)) { | |
356 throw e; | |
357 } | |
358 } | |
359 } | |
360 if (typeof content === 'undefined') { | |
361 let summaryElement = feedQueryXPath(this.document, entryElement, | |
362 './atom03:summary'); | |
363 if (summaryElement !== null) { | |
364 try { | |
365 content = this.parseAtom03ContentConstruct(summaryElement, | |
366 false); | |
367 } catch (e) { | |
368 if (!(e instanceof TypeError)) { | |
369 throw e; | |
370 } | |
371 } | |
372 } | |
373 } | |
374 | |
375 return new FeedEntry({title, link, date, content}); | |
376 } | |
377 | |
378 parseAtom03Feed() { | |
379 let title; | |
380 let subtitle; | |
381 let logo; | |
382 let entries = []; | |
383 let documentElement = this.document.documentElement; | |
384 | |
385 let titleElement = feedQueryXPath(this.document, documentElement, | |
386 './atom03:title'); | |
387 if (titleElement !== null) { | |
388 try { | |
389 title = this.parseAtom03ContentConstruct(titleElement); | |
390 } catch (e) { | |
391 if (!(e instanceof TypeError)) { | |
392 throw e; | |
393 } | |
394 } | |
395 } | |
396 | |
397 let taglineElement = feedQueryXPath(this.document, documentElement, | |
398 './atom03:tagline'); | |
399 if (taglineElement !== null) { | |
400 try { | |
401 title = this.parseAtom03ContentConstruct(taglineElement); | |
402 } catch (e) { | |
403 if (!(e instanceof TypeError)) { | |
404 throw e; | |
405 } | |
406 } | |
407 } | |
408 | |
409 let logoElement = feedQueryXPath(this.document, documentElement, | |
410 './atom03:logo'); | |
411 if (logoElement !== null) { | |
412 try { | |
413 logo = this.parseAtomLogo(logoElement); | |
414 } catch (e) { | |
415 if (!(e instanceof TypeError)) { | |
416 throw e; | |
417 } | |
418 } | |
419 } | |
420 | |
421 let entryElements = feedQueryXPathAll(this.document, documentElement, | |
422 './atom03:entry'); | |
423 for (let entryElement of entryElements) { | |
424 entries.push(this.parseAtom03Entry(entryElement)); | |
425 } | |
426 | |
427 return new Feed(this.url, {title, subtitle, logo, entries}); | |
428 } | |
429 | |
212 parseAtomLogo(logoElement) { | 430 parseAtomLogo(logoElement) { |
213 let url = parseURL(logoElement.textContent.trim(), this.url); | 431 let url = parseURL(logoElement.textContent.trim(), this.url); |
214 if (url === null) { | 432 if (url === null) { |
215 throw new TypeError('invalid URL in <logo> element'); | 433 throw new TypeError('invalid URL in <logo> element'); |
216 } | 434 } |
547 | 765 |
548 parseFromString(xmlString, url) { | 766 parseFromString(xmlString, url) { |
549 this.url = url; | 767 this.url = url; |
550 this.document = new DOMParser().parseFromString(xmlString, | 768 this.document = new DOMParser().parseFromString(xmlString, |
551 'application/xml'); | 769 'application/xml'); |
552 if (this.document.documentElement.nodeName.toLowerCase() === | 770 if (this.document.documentElement.namespaceURI === XMLNS.PARSERERROR) { |
553 'parsererror') { | |
554 throw new ParserError(this.document.documentElement.textContent); | 771 throw new ParserError(this.document.documentElement.textContent); |
555 } | 772 } |
556 | 773 |
557 let [type, version] = this.constructor.probeFeed(this.document); | 774 let [type, version] = this.constructor.probeFeed(this.document); |
558 if (type === 'atom') { | 775 if (type === 'atom') { |
559 return this.parseAtomFeed(); | 776 if (version === '0.3') { |
777 return this.parseAtom03Feed(); | |
778 } else if (version === '1.0') { | |
779 return this.parseAtomFeed(); | |
780 } | |
560 } else if (type === 'rss') { | 781 } else if (type === 'rss') { |
561 if (version === '0.9') { | 782 if (version === '0.9') { |
562 return this.parseRSS1Feed(); | 783 return this.parseRSS1Feed(); |
563 } else { | 784 } else { |
564 return this.parseRSS2Feed(); | 785 return this.parseRSS2Feed(); |