Archived
1
0
Disbranĉigi 0
Ĉi tiu deponejo arĥiviĝis je 2024-01-29. Vi povas vidi kaj elŝuti dosierojn, sed ne povas puŝi nek raporti problemojn nek tirpeti.
rsss/rsss.lisp
2019-07-10 12:23:28 -05:00

261 lines
9.1 KiB
Common Lisp

;; This program is free software: you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the
;; Free Software Foundation, either version 3 of the License, or (at your
;; option) any later version.
;; This program is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE.
;; See the GNU General Public License for more details.
;; -----------------
(defpackage :rsss
(:use :cl)
(:export
:parse
:entries :name :uri :date :desc :author :text :media))
(in-package :rsss)
;; —————————————————————————————————————
;; CLASSES
;; —————————————————————————————————————
(defclass feed ()
((uri :initarg :uri :accessor uri :initform nil)
(name :initarg :name :accessor name :initform nil)
(date :initarg :date :accessor date :initform nil)
(desc :initarg :desc :accessor desc :initform nil)
(entries :initarg :entries :accessor entries :initform nil)))
(defclass entry ()
((uri :initarg :uri :accessor uri :initform nil)
(name :initarg :name :accessor name :initform nil)
(date :initarg :date :accessor date :initform nil)
(author :initarg :author :accessor author :initform nil)
(desc :initarg :desc :accessor desc :initform nil)
(media :initarg :media :accessor media :initform nil)
(text :initarg :text :accessor text :initform nil)))
;; —————————————————————————————————————
;; MISC
;; —————————————————————————————————————
(defmacro append-or-replace (list item)
"If a list is empty (nil), then replace it with a new list containing item.
Otherwise, append item to the pre-existing list.
Side-effectively, with nconc et. al."
`(if (nilp ,list)
(setf ,list (list ,item))
(nconc ,list (list ,item))))
(defmacro mapnil (function list)
"Map over a list with a function, but remove all NILs from the result list."
`(remove nil (mapcar ,function ,list)))
(defmacro mapfirst (function list)
"Map over a list with a function, and return the first non-NIL result."
`(car (mapnil ,function ,list)))
;; VARYING LIST → LIST
(defun str-assoc (item list)
"Run #'assoc, but with #'string-equal as the test function."
(assoc item list :test #'string-equal))
;; VARYING → BOOLEAN
(defun nilp (item)
"Return whether or note an item is eq to NIL."
(eq nil item))
;; LIST → VARYING
(defun ie-car (item)
"Try car'ing something… but don't sweat it if, y'know, it fucks."
(ignore-errors (car item)))
;; —————————————————————————————————————
;; PARSING
;; —————————————————————————————————————
;; STRING → RSSS:FEED
(defun parse (xml)
"Parse a given XML string (atom/rss[12]) into a FEED object."
(let* ((node (xmls:parse xml))
(type (feed-type node)))
(cond ((or (eq type :rss2) (eq type :rss1))
(parse-rss node))
((eq type :atom)
(parse-atom node)))))
;; -----------------
(defmacro common-let (node extra-let form &optional extra-form)
"A let-statement used by basically every parsing-function/macro."
`(let ((name (xmls:node-name ,node))
(chchild (xmls:node-children ,node))
(attrs (xmls:node-attrs ,node))
,@extra-let)
,form))
;; —————————————————
;; ATOM PARSING
;; —————————————————
(defmacro parse-atom-children (rsss parent-node child-node
&optional (cond-1 '(T nil))
(cond-2 '(T nil)))
"Code common to parsing both overarching Atom XML and individual entries."
`(mapcar
(lambda (,child-node)
(common-let ,child-node nil
(cond ((string-equal "link" name)
(setf (uri ,rsss) (cadr (str-assoc "href" attrs))))
((string-equal "title" name)
(setf (name ,rsss) (car chchild)))
((string-equal "updated" name)
(setf (date ,rsss) (car chchild)))
((string-equal "summary" name)
(setf (desc ,rsss) (car chchild)))
,cond-1 ,cond-2)))
;; nil))
(xmls:node-children ,parent-node)))
;; -----------------
;; XMLS:NODE → RSSS:FEED
(defun parse-atom (atom-node)
"Parse Atom XMLS node into an rsss FEED object."
(let ((feed (make-instance 'feed)))
(parse-atom-children
feed atom-node atom-child
((string-equal "entry" name)
(append-or-replace (entries feed) (parse-atom-entry atom-child))))
feed))
;; XMLS:NODE → RSSS:ENTRY
(defun parse-atom-entry (entry-node)
"Parse an Atom <entry>'s XMLS:NODE into an RSSS:ENTRY object."
(let ((entry (make-instance 'entry)))
(parse-atom-children
entry entry-node entry-child
((string-equal "content" name)
(setf (text entry) (car chchild)))
((string-equal "author" name)
(setf (author entry)
(parse-atom-author-name entry-child))))
entry))
;; -----------------
;; XMLS:NODE → STRING
(defun parse-atom-author-name (author-node)
"Return the proper name of an author, given an Atom <author> node."
(common-let author-node nil
(if (stringp chchild) chchild
(mapfirst
(lambda (chchchild)
(if (string-equal "name" (xmls:node-name chchchild))
(car (xmls:node-children chchchild))))
chchild))))
;; —————————————————
;; RSS1/RSS2 PARSING
;; —————————————————
(defmacro parse-rss-children (rsss parent-node child-node
&optional (cond-1 '(T nil))
(cond-2 '(T nil))
(cond-3 '(T nil))
(cond-4 '(T nil)))
"Some code common to parsing the children of rss nodes."
`(mapcar
(lambda (,child-node)
(common-let ,child-node nil
(cond ((string-equal "title" name)
(setf (name ,rsss) (ie-car chchild)))
((string-equal "pubDate" name)
(setf (date ,rsss) (ie-car chchild)))
((string-equal "date" name)
(setf (date ,rsss) (ie-car chchild)))
((string-equal "link" name)
(setf (uri ,rsss) (ie-car chchild)))
,cond-1 ,cond-2 ,cond-3 ,cond-4)))
(xmls:node-children ,parent-node)))
;; -----------------
;; XMLS:NODE → RSSS:FEED
(defun parse-rss (rss-node)
"Parse an RSS XMLS node into an rsss:FEED object."
(let ((feed (make-instance 'feed)))
(mapcar
(lambda (rss-child)
(let ((name (xmls:node-name rss-child)))
(cond ((string-equal "channel" name)
(parse-rss-channel feed rss-child))
((string-equal "item" name)
(append-or-replace
(entries feed) (parse-rss-item rss-child))))))
(xmls:node-children rss-node))
feed))
;; RSSS:FEED XMLS:NODE → NIL
(defun parse-rss-channel (feed channel-node)
"Parse a channel node of an RSS feed; modifies the FEED object."
(parse-rss-children
feed channel-node channel-child
((string-equal "description" name)
(setf (desc feed) (ie-car chchild)))
((string-equal "item" name)
(append-or-replace (entries feed) (parse-rss-item channel-child))))
feed)
;; XMLS:NODE → RSSS:ENTRY
(defun parse-rss-item (entry-node)
"Parse an item (XMLS:NODE) of an RSS feed."
(let ((entry (make-instance 'entry)))
(parse-rss-children
entry entry-node entry-child
((or (string-equal "content" name) (string-equal "encoded" name))
(setf (text entry) (ie-car chchild)))
;; about the following: people use <description> tags for both summaries
;; and for actual post-bodies. (wtf :/)
;; so, if the text is longer than 250 characters, it's *probably* not
;; a summary, but an actual post. then again, not all posts are *that*
;; long…
;; this is a hack that won't always be helpful or effective, it's the
;; best trade-off I could think of. sorry ♥
((string-equal "description" name)
(if (and (< 250 (length (ie-car chchild))) (not (text entry)))
(setf (text entry) (ie-car chchild))
(setf (desc entry) (ie-car chchild))))
((string-equal "enclosure" name)
(setf (media entry) (cadr (str-assoc "url" attrs))))
((or (string-equal "author" name) (string-equal "creator" name))
(setf (author entry) (ie-car chchild))))
entry))
;; —————————————————————————————————————
;; PRE-PARSING
;; —————————————————————————————————————
;; STRING → SYMBOL
(defun feed-type (node)
"Return the type of the feed-- :rss2 for RSS 2.0, :rss1 for RSS 1.0/other,
and :atom for (obviously!) Atom."
(let ((name (xmls:node-name node))
(attrs (xmls:node-attrs node)))
(cond ((and (string-equal "rss" name)
(equal "2.0" (cadr (assoc "version" attrs :test #'equal))))
:rss2)
((or (string-equal "rss" name) (string-equal "rdf" name))
:rss1)
((string-equal "feed" name)
:atom))))