Archived
1
0
Disbranĉigi 0

Complete rewrite. Now functional, and significantly better! ☆

This commit is contained in:
Jaidyn Levesque 2019-07-09 23:22:02 -05:00
parent fc7c2f1e9c
commit bac107fb3f
13 changed files with 9269 additions and 192 deletions

28
README
View File

@ -1,28 +0,0 @@
================================================================================
RSSS : `RSSS SAGE` An RSS parser~
================================================================================
rsss is made to make your life (hopefully) a bit easier when you've got to deal
with RSS data.
It has functions for easily parsing both the overarching stucture, and the
individual <item>'s.
----------------------------------------
USAGE
----------------------------------------
You're probably interested in `rsss:feed-items`, which returns a list of every
item in the RSS feed, and the `rsss:title *data*`, `rsss:description`, etc.,
which fetch the traits of individual <item>'s or the overarching <channel>.
They're called like so: (function *data*), where *data* is the XML data.
----------------------------------------
BORING STUFF
----------------------------------------
License is in COPYING (GNU GPLv3)
Author is Jaidyn Ann <jadedctrl@teknik.io>
Sauce is at https://git.eunichx.us/rsss

44
README.txt Normal file
View File

@ -0,0 +1,44 @@
===============================================================================
RSSS An RSS/Atom parser
===============================================================================
Reading Syndicated Stuff Sagely is made to make your life (hopefully) a bit
easier when you've got to deal with Atom/RSS feed XML, especially when you need
to handle *both* types of feed.
It generalizes all types of feed (Atom, RSS 2.0/1.0) into a single RSSS:FEED
object, with subsequent RSSS:ENTRY objects inside for <entry>/<item>s within
the feed.
————————————————————————————————————————
USAGE
————————————————————————————————————————
You can turn a feed's XML (string) into an RSSS:FEED object with #'rsss:parse.
Then, you can read it by means of it's slots.
Slots of both FEEDs and ENTRYs:
* name
* date
* desc
* uri
Slots exclusively for FEEDs:
* entries
Slots exclusively for ENTRYs:
* author
* media
* text
Each slot has an accessor in the :rsss package, like #'rsss:media, etc.
Good luck!
————————————————————————————————————————
BORING STUFF
————————————————————————————————————————
License is in COPYING.txt (GNU GPLv3)
Author is Jaidyn Ann <jadedctrl@teknik.io>
Sauce is at https://git.eunichx.us/rsss.git

View File

@ -1,14 +1,9 @@
(defsystem "rsss"
:version "0.1"
:version "0.9"
:author "Jaidyn Ann <jadedctrl@teknik.io>"
:license "GPLv3"
:depends-on ("xmls")
:components ((:module "src"
:components
((:file "package")
(:file "main"))))
:components ((:file "rsss"))
:description
"RSS parser library.")
"Reading Syndicated Stuff Sagely is a feed format-neutral parser.
Both Atom and RSS-friendly.")

234
rsss.lisp Normal file
View File

@ -0,0 +1,234 @@
;; This program is free software: you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the
;; Free Software Foundation, either version 3 of the License, or (at your
;; option) any later version.
;; This program is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE.
;; See the GNU General Public License for more details.
;; -----------------
(defpackage :rsss
(:use :cl)
(:export
:parse
:entries :name :uri :date :desc :author :text :media))
(in-package :rsss)
;; —————————————————————————————————————
;; CLASSES
;; —————————————————————————————————————
(defclass feed ()
((uri :initarg :uri :accessor uri :initform nil)
(name :initarg :name :accessor name :initform nil)
(date :initarg :date :accessor date :initform nil)
(desc :initarg :desc :accessor desc :initform nil)
(entries :initarg :entries :accessor entries :initform nil)))
(defclass entry ()
((uri :initarg :uri :accessor uri :initform nil)
(name :initarg :name :accessor name :initform nil)
(date :initarg :date :accessor date :initform nil)
(author :initarg :author :accessor author :initform nil)
(desc :initarg :desc :accessor desc :initform nil)
(media :initarg :media :accessor media :initform nil)
(text :initarg :text :accessor text :initform nil)))
;; —————————————————————————————————————
;; MISC
;; —————————————————————————————————————
(defmacro append-or-replace (list item)
"If a list is empty (nil), then replace it with a new list containing item.
Otherwise, append item to the pre-existing list.
Side-effectively, with nconc et. al."
`(if (nilp ,list)
(setf ,list (list ,item))
(nconc ,list (list ,item))))
;; VARYING LIST → LIST
(defun equ-assoc (item list)
"Run #'assoc, but with #'equal as the test function."
(assoc item list :test #'equal))
;; VARYING → BOOLEAN
(defun nilp (item)
"Return whether or note an item is eq to NIL."
(eq nil item))
;; LIST → VARYING
(defun ie-car (item)
"Try car'ing something… but don't sweat it if, y'know, it fucks."
(ignore-errors (car item)))
;; —————————————————————————————————————
;; PARSING
;; —————————————————————————————————————
;; STRING → RSSS:FEED
(defun parse (xml)
"Parse a given XML string (atom/rss[12]) into a FEED object."
(let* ((node (xmls:parse xml))
(type (feed-type node)))
(cond ((or (eq type :rss2) (eq type :rss1))
(parse-rss node))
((eq type :atom)
(parse-atom node)))))
;; -----------------
(defmacro common-let (node extra-let form &optional extra-form)
"A let-statement used by basically every parsing-function/macro."
`(let ((name (xmls:node-name ,node))
(chchild (xmls:node-children ,node))
(attrs (xmls:node-attrs ,node))
,@extra-let)
,form))
;; —————————————————
;; ATOM PARSING
;; —————————————————
(defmacro parse-atom-children (rsss parent-node child-node extra-cond)
"Code common to parsing both overarching Atom XML and individual entries."
`(mapcar
(lambda (,child-node)
(common-let ,child-node nil
(cond ((equal "link" name)
(setf (uri ,rsss) (cadr (equ-assoc "href" attrs))))
((equal "title" name)
(setf (name ,rsss) (car chchild)))
((equal "updated" name)
(setf (date ,rsss) (car chchild)))
((equal "summary" name)
(setf (desc ,rsss) (car chchild)))
,extra-cond)))
;; nil))
(xmls:node-children ,parent-node)))
;; -----------------
;; XMLS:NODE → RSSS:FEED
(defun parse-atom (atom-node)
"Parse Atom XMLS node into an rsss FEED object."
(let ((feed (make-instance 'feed)))
(parse-atom-children
feed atom-node atom-child
((equal "entry" name)
(append-or-replace (entries feed) (parse-atom-entry atom-child))))
feed))
;; XMLS:NODE → RSSS:ENTRY
(defun parse-atom-entry (entry-node)
"Parse an Atom <entry>'s XMLS:NODE into an RSSS:ENTRY object."
(let ((entry (make-instance 'entry)))
(parse-atom-children
entry entry-node entry-child
((equal "content" name)
(setf (text entry) (car chchild))))
entry))
;; —————————————————
;; RSS1/RSS2 PARSING
;; —————————————————
(defmacro parse-rss-children (rsss parent-node child-node
&optional (cond-1 `(T nil))
(cond-2 `(T nil))
(cond-3 `(T nil))
(cond-4 `(T nil)))
"Some code common to parsing the children of rss nodes."
`(mapcar
(lambda (,child-node)
(common-let ,child-node nil
(cond ((equal "title" name)
(setf (name ,rsss) (ie-car chchild)))
((equal "pubDate" name)
(setf (date ,rsss) (ie-car chchild)))
((equal "date" name)
(setf (date ,rsss) (ie-car chchild)))
((equal "link" name)
(setf (uri ,rsss) (ie-car chchild)))
,cond-1 ,cond-2 ,cond-3 ,cond-4)))
(xmls:node-children ,parent-node)))
;; -----------------
;; XMLS:NODE → RSSS:FEED
(defun parse-rss (rss-node)
"Parse an RSS XMLS node into an rsss:FEED object."
(let ((feed (make-instance 'feed)))
(mapcar
(lambda (rss-child)
(let ((name (xmls:node-name rss-child)))
(cond ((equal "channel" name)
(parse-rss-channel feed rss-child))
((equal "item" name)
(append-or-replace
(entries feed) (parse-rss-item rss-child))))))
(xmls:node-children rss-node))
feed))
;; RSSS:FEED XMLS:NODE → NIL
(defun parse-rss-channel (feed channel-node)
"Parse a channel node of an RSS feed; modifies the FEED object."
(parse-rss-children
feed channel-node channel-child
((equal "description" name)
(setf (desc feed) (ie-car chchild)))
((equal "item" name)
(append-or-replace (entries feed) (parse-rss-item channel-child))))
feed)
;; XMLS:NODE → RSSS:ENTRY
(defun parse-rss-item (entry-node)
"Parse an item (XMLS:NODE) of an RSS feed."
(let ((entry (make-instance 'entry)))
(parse-rss-children
entry entry-node entry-child
((or (equal "content" name) (equal "encoded" name))
(setf (text entry) (ie-car chchild)))
;; about the following: people use <description> tags for both summaries
;; and for actual post-bodies. (wtf :/)
;; so, if the text is longer than 250 characters, it's *probably* not
;; a summary, but an actual post. then again, not all posts are *that*
;; long…
;; this is a hack that won't always be helpful or effective, it's the
;; best trade-off I could think of. sorry ♥
((equal "description" name)
(if (and (< 250 (length (ie-car chchild))) (not (text entry)))
(setf (text entry) (ie-car chchild))
(setf (desc entry) (ie-car chchild))))
((equal "enclosure" name)
(setf (media entry) (cadr (assoc "url" attrs :test #'equal))))
((or (equal "author" name) (equal "creator" name))
(setf (author entry) (ie-car chchild))))
entry))
;; —————————————————————————————————————
;; PRE-PARSING
;; —————————————————————————————————————
;; STRING → SYMBOL
(defun feed-type (node)
"Return the type of the feed-- :rss2 for RSS 2.0, :rss1 for RSS 1.0/other,
and :atom for (obviously!) Atom."
(let ((name (xmls:node-name node))
(attrs (xmls:node-attrs node)))
(cond ((and (equal "rss" name)
(equal "2.0" (cadr (assoc "version" attrs :test #'equal))))
:rss2)
((equal "rss" name)
:rss1)
((equal "feed" name)
:atom))))

Binary file not shown.

View File

@ -1,111 +0,0 @@
(in-package :rsss)
;; ----------------------------------------
;; FEED PARSING
;; ----------------------------------------
(defun feed-values (data value)
"Return all values from a feed matching a set query."
(if (getf-string data "channel")
(getf-strings (getf-string data "channel") value)
(getf-strings data value)))
(defun feed-value (data value)
"Return the first value from a feed matching a set query."
(car (feed-values data value)))
(defun feed-value-listless (data value)
"Return the first value from a feed matching a set query,
but as an isolated string."
(let ((result (feed-value data value)))
(if (listp result)
(car (last result))
result)))
(defun feed-items (data)
"Return a list of all RSS `<item>`s (articles)."
(feed-values data "item"))
(defun title (data)
"Return the title of a set of data.
Accepts an entire XML file (for the `<channel>`'s data)
or a single `<item>`."
(feed-value-listless data "title"))
(defun description (data)
"Return the description of a set of data.
Accepts an entire XML file (for the `<channel>`'s data)
or a single `<item>`."
(feed-value-listless data "description"))
(defun link (data)
"Return the link of a set of data.
Accepts an entire XML file (for the `<channel>`'s data)
or a single `<item>`."
(feed-value-listless data "link"))
(defun pubdate (data)
"Return the publish-date of a set of data.
Accepts an entire XML file (for the `<channel>`'s data)
or a single `<item>`."
(feed-value-listless data "pubDate"))
;; ----------------------------------------
;; MISC
;; ----------------------------------------
(defun getf-string (list string)
"Get an item from a list by an identifying string in `car`.
I.E., if the string is 'apple', the first sublist like this:
('apple' 1 2 3)
will be returned."
(car (getf-strings list string)))
(defun getf-strings (list string &optional (stack '()))
"Get items from list by an identifying string in `car`.
I.E., if the string is 'apple', any sublists like this:
('apple' 1 2 3)
will be returned."
;; just recurse through the list, adding each new matching
;; item to the `stack`
(if (and (< 0 (length list)) (listp list))
(if (ignore-errors
;; the item might not be a list; for our purposes, let's ignore that.
(equal
(car (car list)) ;; '( ( here ) )
string))
(getf-strings (cdr list) string (concatenate 'list stack (list (car list))))
(getf-strings (cdr list) string stack))
stack))

View File

@ -1,16 +0,0 @@
(defpackage :rsss
(:use :cl)
(:export
:feed-value
:feed-values
:feed-value-listless
:feed-items
:title
:description
:pubdate
:link
))
(in-package :rsss)

4717
t/atom.xml Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,28 +0,0 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Sage Emergency Broadcast</title>
<description>Your daily dose of RSS Sage tests!</description>
<link>https://rsss.eunichx.us</link>
<lastBuildDate>Mon, 28 Aug 2018 11:12:55 -0400 </lastBuildDate>
<pubDate>Tue, 29 Aug 2018 09:00:00 -0400</pubDate>
<item>
<title>Local Resident Takes a Nap</title>
<description>John was reported to be "tired" and "irritable" before his nap. Not so any more.</description>
<link>gopher://www.news.com.co.uk.cn/nap.cgi</link>
<guid isPermaLink="false"> 1102345</guid>
<pubDate>Mon, 28 Aug 2018 09:00:00 -0400</pubDate>
</item>
<item>
<title>London Bridge Has Burned Down</title>
<description>Oh no, not again! :(</description>
<link>https://londonbridge.co.uk.cn/bridge.php</link>
<guid isPermaLink="false"> 1102345</guid>
<pubDate>Tue, 29 Aug 2018 09:00:00 -0400</pubDate>
</item>
</channel>
</rss>

4109
t/rss1.xml Normal file

File diff suppressed because one or more lines are too long

161
t/rss2.xml Normal file
View File

@ -0,0 +1,161 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/">
<channel>
<title>Esperanto-USA member blogs</title>
<link>http://esperanto-usa.org/eusa/blogs/member-blogs.rss</link>
<description>Recent posts from Esperanto-USA members / Lastaj blogeroj fare de membroj de Esperanto-USA</description>
<pubDate>Tue, 09 Jul 2019 00:00:39 -0700</pubDate>
<item>
<title>Semajna kunveno por 2019-07-02</title>
<link>http://www.ekoci.org/2019/07/02/semajna-kunveno-por-2019-07-02/</link>
<description>Ekoci</description>
<pubDate>Tue, 02 Jul 2019 14:00:44 +0000</pubDate>
<dc:date>2019-07-02T14:00:44+00:00</dc:date>
</item>
<item>
<title>Ĉirkaŭ la mondon post okdek tagoj 26</title>
<link>http://eo1a.blogspot.com/2019/06/cirkau-la-mondon-post-okdek-tagoj-26.html</link>
<description>Esperanto Unua</description>
<pubDate>Sat, 29 Jun 2019 00:47:00 +0000</pubDate>
<dc:date>2019-06-29T00:47:00+00:00</dc:date>
</item>
<item>
<title>Maja bulteno</title>
<link>http://esperanto-chicago.org/maja-bulteno-2019/</link>
<description>La Esperanto-Societo de Ŝikago</description>
<pubDate>Fri, 14 Jun 2019 14:09:22 +0000</pubDate>
<dc:date>2019-06-14T14:09:22+00:00</dc:date>
</item>
<item>
<title>Aprila bulteno</title>
<link>http://esperanto-chicago.org/aprila-bulteno-2019/</link>
<description>La Esperanto-Societo de Ŝikago</description>
<pubDate>Fri, 14 Jun 2019 14:07:09 +0000</pubDate>
<dc:date>2019-06-14T14:07:09+00:00</dc:date>
</item>
<item>
<title>Marta bulteno</title>
<link>http://esperanto-chicago.org/marta-bulteno-2019/</link>
<description>La Esperanto-Societo de Ŝikago</description>
<pubDate>Fri, 14 Jun 2019 14:03:20 +0000</pubDate>
<dc:date>2019-06-14T14:03:20+00:00</dc:date>
</item>
<item>
<title>Februara bulteno</title>
<link>http://esperanto-chicago.org/februara-bulteno-2019/</link>
<description>La Esperanto-Societo de Ŝikago</description>
<pubDate>Fri, 14 Jun 2019 13:58:17 +0000</pubDate>
<dc:date>2019-06-14T13:58:17+00:00</dc:date>
</item>
<item>
<title>Propaganda Komitato / Propaganda Committee</title>
<link>https://esperantoslv.wordpress.com/2019/06/13/propaganda-komitato-propaganda-committee/</link>
<description>Esperanto SLV</description>
<pubDate>Thu, 13 Jun 2019 15:37:13 +0000</pubDate>
<dc:date>2019-06-13T15:37:13+00:00</dc:date>
</item>
<item>
<title>Ĉirkaŭ la mondon post okdek tagoj 25</title>
<link>http://eo1a.blogspot.com/2019/06/cirkau-la-mondon-post-okdek-tagoj-25.html</link>
<description>Esperanto Unua</description>
<pubDate>Sat, 08 Jun 2019 13:43:00 +0000</pubDate>
<dc:date>2019-06-08T13:43:00+00:00</dc:date>
</item>
<item>
<title>Semajna kunveno por 2019-06-04</title>
<link>http://www.ekoci.org/2019/06/04/semajna-kunveno-por-2019-06-04/</link>
<description>Ekoci</description>
<pubDate>Tue, 04 Jun 2019 14:47:07 +0000</pubDate>
<dc:date>2019-06-04T14:47:07+00:00</dc:date>
</item>
<item>
<title>Norda Karolino Printempa Esperanto-Renkontiĝo 2019</title>
<link>https://esperanto-nc.org/2019/06/02/norda-karolino-printempa-esperanto-renkontigo-2019/</link>
<description>Esperanto in North Carolina</description>
<pubDate>Sun, 02 Jun 2019 00:13:29 +0000</pubDate>
<dc:date>2019-06-02T00:13:29+00:00</dc:date>
</item>
<item>
<title>Semajna kunveno por 2019-05-28</title>
<link>http://www.ekoci.org/2019/05/27/semajna-kunveno-por-2019-05-28/</link>
<description>Ekoci</description>
<pubDate>Mon, 27 May 2019 20:31:56 +0000</pubDate>
<dc:date>2019-05-27T20:31:56+00:00</dc:date>
</item>
<item>
<title>Ĉirkaŭ la mondon post okdek tagoj 24</title>
<link>http://eo1a.blogspot.com/2019/05/cirkau-la-mondon-post-okdek-tagoj-24.html</link>
<description>Esperanto Unua</description>
<pubDate>Mon, 27 May 2019 13:11:00 +0000</pubDate>
<dc:date>2019-05-27T13:11:00+00:00</dc:date>
</item>
<item>
<title>Esther Schors TED Talk</title>
<link>https://esperanto-nc.org/2019/05/24/esther-schors-ted-talk/</link>
<description>Esperanto in North Carolina</description>
<pubDate>Fri, 24 May 2019 20:17:55 +0000</pubDate>
<dc:date>2019-05-24T20:17:55+00:00</dc:date>
</item>
<item>
<title>Ĉirkaŭ la mondon post okdek tagoj 23</title>
<link>http://eo1a.blogspot.com/2019/05/cirkau-la-mondon-post-okdek-tagoj-23.html</link>
<description>Esperanto Unua</description>
<pubDate>Sat, 11 May 2019 14:02:00 +0000</pubDate>
<dc:date>2019-05-11T14:02:00+00:00</dc:date>
</item>
<item>
<title>Semajna kunveno por 2019-05-07</title>
<link>http://www.ekoci.org/2019/05/07/semajna-kunveno-por-2019-05-07/</link>
<description>Ekoci</description>
<pubDate>Tue, 07 May 2019 13:57:55 +0000</pubDate>
<dc:date>2019-05-07T13:57:55+00:00</dc:date>
</item>
<item>
<title>Ni Desegnis Bestojn/We Drew Animals</title>
<link>https://esperantoslv.wordpress.com/2019/05/06/ni-desegnas-bestojn/</link>
<description>Esperanto SLV</description>
<pubDate>Mon, 06 May 2019 13:30:44 +0000</pubDate>
<dc:date>2019-05-06T13:30:44+00:00</dc:date>
</item>
<item>
<title>Ĉirkaŭ la mondon post okdek tagoj 22</title>
<link>http://eo1a.blogspot.com/2019/05/cirkau-la-mondon-post-okdek-tagoj-22.html</link>
<description>Esperanto Unua</description>
<pubDate>Sat, 04 May 2019 17:31:00 +0000</pubDate>
<dc:date>2019-05-04T17:31:00+00:00</dc:date>
</item>
<item>
<title>NULIGITA! Semajna kunveno por 2019-04-30</title>
<link>http://www.ekoci.org/2019/04/30/semajna-kunveno-por-2019-04-30/</link>
<description>Ekoci</description>
<pubDate>Tue, 30 Apr 2019 14:05:51 +0000</pubDate>
<dc:date>2019-04-30T14:05:51+00:00</dc:date>
</item>
<item>
<title>Ĉu vi Ĉeestos NASK?/Will you be at NASK?</title>
<link>https://esperantoslv.wordpress.com/2019/04/27/cu-vi-ceestos-nask-will-you-be-at-nask/</link>
<description>Esperanto SLV</description>
<pubDate>Sat, 27 Apr 2019 16:31:34 +0000</pubDate>
<dc:date>2019-04-27T16:31:34+00:00</dc:date>
</item>
<item>
<title>Bonvenon al nia Nova Retpaĝo/Welcome to Our New Webpage</title>
<link>https://esperantoslv.wordpress.com/2019/04/26/bonevenon-al-nia-nova-retpagxo-welcome-to-our-new-webpage/</link>
<description>Esperanto SLV</description>
<pubDate>Fri, 26 Apr 2019 00:41:16 +0000</pubDate>
<dc:date>2019-04-26T00:41:16+00:00</dc:date>
</item>
<item>
<title>Tomaso Alexander Does A Video Promoting the 2019 Spring Esperanto Gathering in North Carolina</title>
<link>https://esperanto-nc.org/2019/02/16/tomaso-alexander-does-a-video-promoting-the-spring-esperanto-gathering/</link>
<description>Esperanto in North Carolina</description>
<pubDate>Sat, 16 Feb 2019 20:42:05 +0000</pubDate>
<dc:date>2019-02-16T20:42:05+00:00</dc:date>
</item>
<dc:date>2019-07-09T00:00:39-07:00</dc:date>
</channel>
</rss>