Support separate download-dir and URL-dir
This commit is contained in:
parent
771241cb1d
commit
e2f2f38f01
|
@ -29,16 +29,22 @@
|
||||||
;;; Mirror-img
|
;;; Mirror-img
|
||||||
;;; ————————————————————————————————————————
|
;;; ————————————————————————————————————————
|
||||||
;; pathname pathname list → string
|
;; pathname pathname list → string
|
||||||
(defun mirror-img (html-file base-dir &key (tags *default-tags*))
|
(defun mirror-img (html-file download-dir &key (url-dir download-dir) (tags *default-tags*))
|
||||||
"Attempt to mirror all remote HREF/SRC URLs of an HTML file’s tags,
|
"Attempt to mirror all remote HREF/SRC URLs of an HTML file’s tags,
|
||||||
downloading them to base-dir. The remote URLs will be replaced with the
|
downloading them to DOWNLOAD-DIR. For each remote URL that could be
|
||||||
local version, where downloading was successful.
|
successfuly mirrored, the remote URLs will be replaced with the local
|
||||||
|
version. The URL used will will be BASE-DIR plus the filename, where
|
||||||
|
BASE-DIR defaults to DOWNLOAD-DIR.
|
||||||
Returns a string of the modified page’s HTML."
|
Returns a string of the modified page’s HTML."
|
||||||
(let* ((dom (lquery:$ (lquery:initialize html-file)))
|
(let* ((dom
|
||||||
|
(lquery:$ (lquery:initialize html-file)))
|
||||||
(url→pathname-alist
|
(url→pathname-alist
|
||||||
(mirror-linked-urls dom :tags tags :base-dir base-dir))
|
(mirror-linked-urls dom :tags tags :download-dir download-dir))
|
||||||
(url→relative-url-alist
|
(url→relative-url-alist
|
||||||
(mapcar (lambda (pair) (cons (car pair) (namestring (cdr pair))))
|
(mapcar
|
||||||
|
(lambda (pair)
|
||||||
|
(cons (car pair)
|
||||||
|
(mirror-pathname->mirror-url (cdr pair) download-dir url-dir)))
|
||||||
url→pathname-alist)))
|
url→pathname-alist)))
|
||||||
(substitute-urls dom
|
(substitute-urls dom
|
||||||
url→relative-url-alist
|
url→relative-url-alist
|
||||||
|
@ -46,9 +52,9 @@ Returns a string of the modified page’s HTML."
|
||||||
(aref (lquery:$ dom (serialize)) 0)))
|
(aref (lquery:$ dom (serialize)) 0)))
|
||||||
|
|
||||||
;; lquery-dom list pathname → alist
|
;; lquery-dom list pathname → alist
|
||||||
(defun mirror-linked-urls (dom &key (tags *default-tags*) base-dir)
|
(defun mirror-linked-urls (dom &key (tags *default-tags*) download-dir)
|
||||||
"Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery
|
"Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery
|
||||||
DOM, downloading them to either the current-working directory or the base-dir.
|
DOM, downloading them to either the current-working directory or the DOWNLOAD-DIR.
|
||||||
Returns an associative list containing pairs of remote-URLs to local pathnames.
|
Returns an associative list containing pairs of remote-URLs to local pathnames.
|
||||||
Any unsuccessfuly-mirrored URL is excluded from the alist.
|
Any unsuccessfuly-mirrored URL is excluded from the alist.
|
||||||
For example:
|
For example:
|
||||||
|
@ -58,7 +64,7 @@ For example:
|
||||||
(mapcar (lambda (url)
|
(mapcar (lambda (url)
|
||||||
(let ((download-path
|
(let ((download-path
|
||||||
(ensure-directories-exist
|
(ensure-directories-exist
|
||||||
(mirrored-pathname url :base-dir base-dir))))
|
(mirrored-pathname url :base-dir download-dir))))
|
||||||
(if (http-fetch url download-path)
|
(if (http-fetch url download-path)
|
||||||
(cons url download-path))))
|
(cons url download-path))))
|
||||||
(linked-urls dom :tags tags))))
|
(linked-urls dom :tags tags))))
|
||||||
|
@ -96,6 +102,14 @@ to mirror it."
|
||||||
(quri:uri-host url)
|
(quri:uri-host url)
|
||||||
(pathname-leaf path)))))
|
(pathname-leaf path)))))
|
||||||
|
|
||||||
|
;; pathname pathname string → string
|
||||||
|
(defun mirror-pathname->mirror-url (pathname download-dir url-dir)
|
||||||
|
"Convert a mirrored file into a substitution-worthy URL, from its PATHNAME,
|
||||||
|
DOWNLOAD-DIR, and URL-DIR."
|
||||||
|
(format nil "~A~A"
|
||||||
|
url-dir
|
||||||
|
(relative-pathname download-dir pathname)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;;; DOM-parsing
|
;;; DOM-parsing
|
||||||
|
@ -190,3 +204,58 @@ That is, 443 is implied by HTTPS, so nil is returned; but 998 wouldn’t be impl
|
||||||
"Given a pathname, return the entirety of the file leaf.
|
"Given a pathname, return the entirety of the file leaf.
|
||||||
That is, everything following the last directory name."
|
That is, everything following the last directory name."
|
||||||
(car (last (split-sequence:split-sequence #\/ (namestring pathname)))))
|
(car (last (split-sequence:split-sequence #\/ (namestring pathname)))))
|
||||||
|
|
||||||
|
;; pathname pathname → pathname
|
||||||
|
(defun relative-pathname (from pathname)
|
||||||
|
"Create a relative pathname leading from the pathname FROM to PATHNAME.
|
||||||
|
By default, FROM is the current working directory."
|
||||||
|
(let* ((dir-str
|
||||||
|
(reduce (lambda (a b)
|
||||||
|
(format nil "~A/~A" a b))
|
||||||
|
(cdr (relative-pathname-directory pathname from))))
|
||||||
|
(dir-pathname
|
||||||
|
(pathname (format nil "~A/" dir-str))))
|
||||||
|
(merge-pathnames dir-pathname
|
||||||
|
(pathname (pathname-leaf pathname)))))
|
||||||
|
|
||||||
|
;; pathname pathname → list
|
||||||
|
(defun relative-pathname-directory (pathname &optional (from (uiop/os:getcwd)))
|
||||||
|
"Given a PATHNAME, create a relative PATHNAME-DIRECTORY-style list from the
|
||||||
|
perspective of the FROM pathname (the current working directory, by default).
|
||||||
|
Returns something along the lines of '(:RELATIVE “..” “DAD”)"
|
||||||
|
(let* ((from (cdr (pathname-directory from)))
|
||||||
|
(to (cdr (pathname-directory pathname)))
|
||||||
|
(from+to (list-lengths nil from to))
|
||||||
|
(to-append (list)))
|
||||||
|
(append
|
||||||
|
(list :relative)
|
||||||
|
(remove-if
|
||||||
|
#'not
|
||||||
|
(loop for from-dir in (car from+to)
|
||||||
|
for to-dir in (cadr from+to)
|
||||||
|
collect
|
||||||
|
(cond ((equal from-dir to-dir)
|
||||||
|
nil)
|
||||||
|
((not from-dir)
|
||||||
|
to-dir)
|
||||||
|
(to-dir
|
||||||
|
(setq to-append (append to-append (list to-dir)))
|
||||||
|
"..")
|
||||||
|
('T
|
||||||
|
".."))))
|
||||||
|
to-append)))
|
||||||
|
|
||||||
|
;; object list … list → list(-of-lists)
|
||||||
|
(defun list-lengths (padding &rest lists)
|
||||||
|
"Equalize the lengths of LISTS, by padding the tail-end of smaller lists with
|
||||||
|
the PADDING object."
|
||||||
|
(let ((target-length
|
||||||
|
(apply #'max (mapcar #'length lists))))
|
||||||
|
(mapcar
|
||||||
|
(lambda (lst)
|
||||||
|
(append
|
||||||
|
lst
|
||||||
|
(loop for i
|
||||||
|
upto (- target-length (length lst) 1)
|
||||||
|
collect padding)))
|
||||||
|
lists)))
|
||||||
|
|
|
@ -24,6 +24,8 @@
|
||||||
(defparameter *mirror-img-result* nil)
|
(defparameter *mirror-img-result* nil)
|
||||||
|
|
||||||
(defun relative-pathname (path)
|
(defun relative-pathname (path)
|
||||||
|
"Return an absolute path adding the relative PATH to the :mirror-img
|
||||||
|
system’s path."
|
||||||
(asdf:system-relative-pathname :mirror-img/tests path))
|
(asdf:system-relative-pathname :mirror-img/tests path))
|
||||||
|
|
||||||
(defun run ()
|
(defun run ()
|
||||||
|
@ -47,12 +49,16 @@
|
||||||
'T
|
'T
|
||||||
(stringp
|
(stringp
|
||||||
(setq *mirror-img-result*
|
(setq *mirror-img-result*
|
||||||
(mirror-img:mirror-img (relative-pathname "t/testing-website/index.html")
|
(let* ((html-path (relative-pathname "t/testing-website/index.html"))
|
||||||
"tmp")))))
|
(download-dir (relative-pathname "t/tmp/"))
|
||||||
|
(url-dir (pathname-utils:relative-pathname
|
||||||
|
(relative-pathname "t/")
|
||||||
|
download-dir)))
|
||||||
|
(mirror-img:mirror-img html-path download-dir :url-dir url-dir))))))
|
||||||
|
|
||||||
(define-test mirror-img.files-mirrored (:tags '(mirror-img))
|
(define-test mirror-img.files-mirrored (:tags '(mirror-img))
|
||||||
(assert-equal
|
(assert-equal
|
||||||
'("café" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you")
|
'("café" "classy_fireplace" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you")
|
||||||
(sort (mapcar #'pathname-name
|
(sort (mapcar #'pathname-name
|
||||||
(uiop:directory-files (relative-pathname "t/tmp/localhost/")))
|
(uiop:directory-files (relative-pathname "t/tmp/localhost/")))
|
||||||
#'string-lessp)))
|
#'string-lessp)))
|
||||||
|
@ -140,3 +146,4 @@
|
||||||
(assert-equal
|
(assert-equal
|
||||||
" lak -!?ĉ_berries.png"
|
" lak -!?ĉ_berries.png"
|
||||||
(mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png")))
|
(mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png")))
|
||||||
|
|
||||||
|
|
Ŝarĝante…
Reference in New Issue