Compare commits

...

4 Enmetoj

Author SHA1 Message Date
Jaidyn Ann 74acf3fa2d Replace our RELATIVE-PATHNAME with PATHNAME-UTIL’s 2024-05-31 14:44:33 -05:00
Jaidyn Ann e2f2f38f01 Support separate download-dir and URL-dir 2024-05-31 14:41:46 -05:00
Jaidyn Ann 771241cb1d Fix URLs not getting substituted after download
That is, on the first run of MIRROR-IMG, no
mirrored URLs got substituted — and on subsequent
runs, only previously-downloaded URLS did.
2024-05-31 12:11:31 -05:00
Jaidyn Ann 06890e2395 Ensure testing HTTP server dies, even post-error 2024-05-31 12:10:29 -05:00
3 changed files with 47 additions and 19 deletions

View File

@ -4,7 +4,7 @@
:version "0.1" :version "0.1"
:license "GPLv3" :license "GPLv3"
:author "Jaidyn Ann <jadedctrl@posteo.at>" :author "Jaidyn Ann <jadedctrl@posteo.at>"
:depends-on (:dexador :lquery :split-sequence) :depends-on (:dexador :lquery :pathname-utils :split-sequence)
:components ((:file "src/mirror-img")) :components ((:file "src/mirror-img"))
:in-order-to ((test-op (test-op "mirror-img/tests")) :in-order-to ((test-op (test-op "mirror-img/tests"))
(build-op (build-op "mirror-img/unix")))) (build-op (build-op "mirror-img/unix"))))

View File

@ -29,26 +29,32 @@
;;; Mirror-img ;;; Mirror-img
;;; ———————————————————————————————————————— ;;; ————————————————————————————————————————
;; pathname pathname list → string ;; pathname pathname list → string
(defun mirror-img (html-file base-dir &key (tags *default-tags*)) (defun mirror-img (html-file download-dir &key (url-dir download-dir) (tags *default-tags*))
"Attempt to mirror all remote HREF/SRC URLs of an HTML files tags, "Attempt to mirror all remote HREF/SRC URLs of an HTML files tags,
downloading them to base-dir. The remote URLs will be replaced with the downloading them to DOWNLOAD-DIR. For each remote URL that could be
local version, where downloading was successful. successfuly mirrored, the remote URLs will be replaced with the local
version. The URL used will will be BASE-DIR plus the filename, where
BASE-DIR defaults to DOWNLOAD-DIR.
Returns a string of the modified pages HTML." Returns a string of the modified pages HTML."
(let* ((dom (lquery:$ (lquery:initialize html-file))) (let* ((dom
(lquery:$ (lquery:initialize html-file)))
(urlpathname-alist (urlpathname-alist
(mirror-linked-urls dom :tags tags :base-dir base-dir)) (mirror-linked-urls dom :tags tags :download-dir download-dir))
(urlrelative-url-alist (urlrelative-url-alist
(mapcar (lambda (pair) (cons (car pair) (namestring (cdr pair)))) (mapcar
urlpathname-alist))) (lambda (pair)
(cons (car pair)
(mirror-pathname->mirror-url (cdr pair) download-dir url-dir)))
urlpathname-alist)))
(substitute-urls dom (substitute-urls dom
urlrelative-url-alist urlrelative-url-alist
:tags tags) :tags tags)
(aref (lquery:$ dom (serialize)) 0))) (aref (lquery:$ dom (serialize)) 0)))
;; lquery-dom list pathname → alist ;; lquery-dom list pathname → alist
(defun mirror-linked-urls (dom &key (tags *default-tags*) base-dir) (defun mirror-linked-urls (dom &key (tags *default-tags*) download-dir)
"Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery "Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery
DOM, downloading them to either the current-working directory or the base-dir. DOM, downloading them to either the current-working directory or the DOWNLOAD-DIR.
Returns an associative list containing pairs of remote-URLs to local pathnames. Returns an associative list containing pairs of remote-URLs to local pathnames.
Any unsuccessfuly-mirrored URL is excluded from the alist. Any unsuccessfuly-mirrored URL is excluded from the alist.
For example: For example:
@ -58,7 +64,7 @@ For example:
(mapcar (lambda (url) (mapcar (lambda (url)
(let ((download-path (let ((download-path
(ensure-directories-exist (ensure-directories-exist
(mirrored-pathname url :base-dir base-dir)))) (mirrored-pathname url :base-dir download-dir))))
(if (http-fetch url download-path) (if (http-fetch url download-path)
(cons url download-path)))) (cons url download-path))))
(linked-urls dom :tags tags)))) (linked-urls dom :tags tags))))
@ -96,6 +102,14 @@ to mirror it."
(quri:uri-host url) (quri:uri-host url)
(pathname-leaf path))))) (pathname-leaf path)))))
;; pathname pathname string → string
(defun mirror-pathname->mirror-url (pathname download-dir url-dir)
"Convert a mirrored file into a substitution-worthy URL, from its PATHNAME,
DOWNLOAD-DIR, and URL-DIR."
(format nil "~A~A"
url-dir
(pathname-utils:relative-pathname download-dir pathname)))
;;; DOM-parsing ;;; DOM-parsing
@ -139,8 +153,9 @@ already set)."
(defun http-fetch (url path) (defun http-fetch (url path)
"Download a URL to a path; if successful, returns the pathname. Otherwise, NIL." "Download a URL to a path; if successful, returns the pathname. Otherwise, NIL."
(handler-case (handler-case
(and (dexador:fetch (url-encode-uri url) path) ; DEXADOR:FETCH returns nil on success, errors on fail. Hence the OR.
(pathname path)) (or (dexador:fetch (url-encode-uri url) path)
(pathname path))
(cl-user::file-exists () (cl-user::file-exists ()
path) path)
(error nil))) (error nil)))

View File

@ -24,29 +24,41 @@
(defparameter *mirror-img-result* nil) (defparameter *mirror-img-result* nil)
(defun relative-pathname (path) (defun relative-pathname (path)
"Return an absolute path adding the relative PATH to the :mirror-img
systems path."
(asdf:system-relative-pathname :mirror-img/tests path)) (asdf:system-relative-pathname :mirror-img/tests path))
(defun run () (defun run ()
"Run all tests for the mirror-img package."
(let* ((doc-root (relative-pathname "t/testing-website/")) (let* ((doc-root (relative-pathname "t/testing-website/"))
(acceptor (make-instance 'hunchentoot:easy-acceptor (acceptor (make-instance 'hunchentoot:easy-acceptor
:port 4242 :port 4242
:document-root doc-root)) :document-root doc-root))
(server (hunchentoot:start acceptor))) (server (hunchentoot:start acceptor)))
(lisp-unit2:with-summary ()
(run-tests :package :mirror-img/tests)
(hunchentoot:stop server))))
(lisp-unit2:with-summary ()
(handler-case
(progn
(run-tests :package :mirror-img/tests)
(hunchentoot:stop server))
(error (c)
(hunchentoot:stop server)
c)))))
(define-test mirror-img (:tags '(mirror-img)) (define-test mirror-img (:tags '(mirror-img))
(assert-eq (assert-eq
'T 'T
(stringp (stringp
(setq *mirror-img-result* (setq *mirror-img-result*
(mirror-img:mirror-img (relative-pathname "t/testing-website/index.html") (let* ((html-path (relative-pathname "t/testing-website/index.html"))
"tmp"))))) (download-dir (relative-pathname "t/tmp/"))
(url-dir (pathname-utils:relative-pathname
(relative-pathname "t/")
download-dir)))
(mirror-img:mirror-img html-path download-dir :url-dir url-dir))))))
(define-test mirror-img.files-mirrored (:tags '(mirror-img)) (define-test mirror-img.files-mirrored (:tags '(mirror-img))
(assert-equal (assert-equal
'("café" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you") '("café" "classy_fireplace" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you")
(sort (mapcar #'pathname-name (sort (mapcar #'pathname-name
(uiop:directory-files (relative-pathname "t/tmp/localhost/"))) (uiop:directory-files (relative-pathname "t/tmp/localhost/")))
#'string-lessp))) #'string-lessp)))
@ -134,3 +146,4 @@
(assert-equal (assert-equal
" lak -!?ĉ_berries.png" " lak -!?ĉ_berries.png"
(mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png"))) (mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png")))