Support separate download-dir and URL-dir

This commit is contained in:
Jaidyn Ann 2024-05-31 14:41:46 -05:00
parent 771241cb1d
commit e2f2f38f01
2 changed files with 89 additions and 13 deletions

View File

@ -29,16 +29,22 @@
;;; Mirror-img ;;; Mirror-img
;;; ———————————————————————————————————————— ;;; ————————————————————————————————————————
;; pathname pathname list → string ;; pathname pathname list → string
(defun mirror-img (html-file base-dir &key (tags *default-tags*)) (defun mirror-img (html-file download-dir &key (url-dir download-dir) (tags *default-tags*))
"Attempt to mirror all remote HREF/SRC URLs of an HTML files tags, "Attempt to mirror all remote HREF/SRC URLs of an HTML files tags,
downloading them to base-dir. The remote URLs will be replaced with the downloading them to DOWNLOAD-DIR. For each remote URL that could be
local version, where downloading was successful. successfuly mirrored, the remote URLs will be replaced with the local
version. The URL used will will be BASE-DIR plus the filename, where
BASE-DIR defaults to DOWNLOAD-DIR.
Returns a string of the modified pages HTML." Returns a string of the modified pages HTML."
(let* ((dom (lquery:$ (lquery:initialize html-file))) (let* ((dom
(lquery:$ (lquery:initialize html-file)))
(urlpathname-alist (urlpathname-alist
(mirror-linked-urls dom :tags tags :base-dir base-dir)) (mirror-linked-urls dom :tags tags :download-dir download-dir))
(urlrelative-url-alist (urlrelative-url-alist
(mapcar (lambda (pair) (cons (car pair) (namestring (cdr pair)))) (mapcar
(lambda (pair)
(cons (car pair)
(mirror-pathname->mirror-url (cdr pair) download-dir url-dir)))
urlpathname-alist))) urlpathname-alist)))
(substitute-urls dom (substitute-urls dom
urlrelative-url-alist urlrelative-url-alist
@ -46,9 +52,9 @@ Returns a string of the modified pages HTML."
(aref (lquery:$ dom (serialize)) 0))) (aref (lquery:$ dom (serialize)) 0)))
;; lquery-dom list pathname → alist ;; lquery-dom list pathname → alist
(defun mirror-linked-urls (dom &key (tags *default-tags*) base-dir) (defun mirror-linked-urls (dom &key (tags *default-tags*) download-dir)
"Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery "Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery
DOM, downloading them to either the current-working directory or the base-dir. DOM, downloading them to either the current-working directory or the DOWNLOAD-DIR.
Returns an associative list containing pairs of remote-URLs to local pathnames. Returns an associative list containing pairs of remote-URLs to local pathnames.
Any unsuccessfuly-mirrored URL is excluded from the alist. Any unsuccessfuly-mirrored URL is excluded from the alist.
For example: For example:
@ -58,7 +64,7 @@ For example:
(mapcar (lambda (url) (mapcar (lambda (url)
(let ((download-path (let ((download-path
(ensure-directories-exist (ensure-directories-exist
(mirrored-pathname url :base-dir base-dir)))) (mirrored-pathname url :base-dir download-dir))))
(if (http-fetch url download-path) (if (http-fetch url download-path)
(cons url download-path)))) (cons url download-path))))
(linked-urls dom :tags tags)))) (linked-urls dom :tags tags))))
@ -96,6 +102,14 @@ to mirror it."
(quri:uri-host url) (quri:uri-host url)
(pathname-leaf path))))) (pathname-leaf path)))))
;; pathname pathname string → string
(defun mirror-pathname->mirror-url (pathname download-dir url-dir)
"Convert a mirrored file into a substitution-worthy URL, from its PATHNAME,
DOWNLOAD-DIR, and URL-DIR."
(format nil "~A~A"
url-dir
(relative-pathname download-dir pathname)))
;;; DOM-parsing ;;; DOM-parsing
@ -190,3 +204,58 @@ That is, 443 is implied by HTTPS, so nil is returned; but 998 wouldnt be impl
"Given a pathname, return the entirety of the file leaf. "Given a pathname, return the entirety of the file leaf.
That is, everything following the last directory name." That is, everything following the last directory name."
(car (last (split-sequence:split-sequence #\/ (namestring pathname))))) (car (last (split-sequence:split-sequence #\/ (namestring pathname)))))
;; pathname pathname → pathname
(defun relative-pathname (from pathname)
"Create a relative pathname leading from the pathname FROM to PATHNAME.
By default, FROM is the current working directory."
(let* ((dir-str
(reduce (lambda (a b)
(format nil "~A/~A" a b))
(cdr (relative-pathname-directory pathname from))))
(dir-pathname
(pathname (format nil "~A/" dir-str))))
(merge-pathnames dir-pathname
(pathname (pathname-leaf pathname)))))
;; pathname pathname → list
(defun relative-pathname-directory (pathname &optional (from (uiop/os:getcwd)))
"Given a PATHNAME, create a relative PATHNAME-DIRECTORY-style list from the
perspective of the FROM pathname (the current working directory, by default).
Returns something along the lines of '(:RELATIVE .. DAD)"
(let* ((from (cdr (pathname-directory from)))
(to (cdr (pathname-directory pathname)))
(from+to (list-lengths nil from to))
(to-append (list)))
(append
(list :relative)
(remove-if
#'not
(loop for from-dir in (car from+to)
for to-dir in (cadr from+to)
collect
(cond ((equal from-dir to-dir)
nil)
((not from-dir)
to-dir)
(to-dir
(setq to-append (append to-append (list to-dir)))
"..")
('T
".."))))
to-append)))
;; object list … list → list(-of-lists)
(defun list-lengths (padding &rest lists)
"Equalize the lengths of LISTS, by padding the tail-end of smaller lists with
the PADDING object."
(let ((target-length
(apply #'max (mapcar #'length lists))))
(mapcar
(lambda (lst)
(append
lst
(loop for i
upto (- target-length (length lst) 1)
collect padding)))
lists)))

View File

@ -24,6 +24,8 @@
(defparameter *mirror-img-result* nil) (defparameter *mirror-img-result* nil)
(defun relative-pathname (path) (defun relative-pathname (path)
"Return an absolute path adding the relative PATH to the :mirror-img
systems path."
(asdf:system-relative-pathname :mirror-img/tests path)) (asdf:system-relative-pathname :mirror-img/tests path))
(defun run () (defun run ()
@ -47,12 +49,16 @@
'T 'T
(stringp (stringp
(setq *mirror-img-result* (setq *mirror-img-result*
(mirror-img:mirror-img (relative-pathname "t/testing-website/index.html") (let* ((html-path (relative-pathname "t/testing-website/index.html"))
"tmp"))))) (download-dir (relative-pathname "t/tmp/"))
(url-dir (pathname-utils:relative-pathname
(relative-pathname "t/")
download-dir)))
(mirror-img:mirror-img html-path download-dir :url-dir url-dir))))))
(define-test mirror-img.files-mirrored (:tags '(mirror-img)) (define-test mirror-img.files-mirrored (:tags '(mirror-img))
(assert-equal (assert-equal
'("café" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you") '("café" "classy_fireplace" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you")
(sort (mapcar #'pathname-name (sort (mapcar #'pathname-name
(uiop:directory-files (relative-pathname "t/tmp/localhost/"))) (uiop:directory-files (relative-pathname "t/tmp/localhost/")))
#'string-lessp))) #'string-lessp)))
@ -140,3 +146,4 @@
(assert-equal (assert-equal
" lak -!?ĉ_berries.png" " lak -!?ĉ_berries.png"
(mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png"))) (mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png")))