diff --git a/src/mirror-img.lisp b/src/mirror-img.lisp index 2d1094b..f4d39ad 100644 --- a/src/mirror-img.lisp +++ b/src/mirror-img.lisp @@ -29,26 +29,32 @@ ;;; Mirror-img ;;; ———————————————————————————————————————— ;; pathname pathname list → string -(defun mirror-img (html-file base-dir &key (tags *default-tags*)) +(defun mirror-img (html-file download-dir &key (url-dir download-dir) (tags *default-tags*)) "Attempt to mirror all remote HREF/SRC URLs of an HTML file’s tags, -downloading them to base-dir. The remote URLs will be replaced with the -local version, where downloading was successful. +downloading them to DOWNLOAD-DIR. For each remote URL that could be +successfuly mirrored, the remote URLs will be replaced with the local +version. The URL used will will be BASE-DIR plus the filename, where +BASE-DIR defaults to DOWNLOAD-DIR. Returns a string of the modified page’s HTML." - (let* ((dom (lquery:$ (lquery:initialize html-file))) + (let* ((dom + (lquery:$ (lquery:initialize html-file))) (url→pathname-alist - (mirror-linked-urls dom :tags tags :base-dir base-dir)) + (mirror-linked-urls dom :tags tags :download-dir download-dir)) (url→relative-url-alist - (mapcar (lambda (pair) (cons (car pair) (namestring (cdr pair)))) - url→pathname-alist))) + (mapcar + (lambda (pair) + (cons (car pair) + (mirror-pathname->mirror-url (cdr pair) download-dir url-dir))) + url→pathname-alist))) (substitute-urls dom url→relative-url-alist :tags tags) (aref (lquery:$ dom (serialize)) 0))) ;; lquery-dom list pathname → alist -(defun mirror-linked-urls (dom &key (tags *default-tags*) base-dir) +(defun mirror-linked-urls (dom &key (tags *default-tags*) download-dir) "Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery -DOM, downloading them to either the current-working directory or the base-dir. +DOM, downloading them to either the current-working directory or the DOWNLOAD-DIR. Returns an associative list containing pairs of remote-URLs to local pathnames. Any unsuccessfuly-mirrored URL is excluded from the alist. For example: @@ -58,7 +64,7 @@ For example: (mapcar (lambda (url) (let ((download-path (ensure-directories-exist - (mirrored-pathname url :base-dir base-dir)))) + (mirrored-pathname url :base-dir download-dir)))) (if (http-fetch url download-path) (cons url download-path)))) (linked-urls dom :tags tags)))) @@ -96,6 +102,14 @@ to mirror it." (quri:uri-host url) (pathname-leaf path))))) +;; pathname pathname string → string +(defun mirror-pathname->mirror-url (pathname download-dir url-dir) + "Convert a mirrored file into a substitution-worthy URL, from its PATHNAME, +DOWNLOAD-DIR, and URL-DIR." + (format nil "~A~A" + url-dir + (relative-pathname download-dir pathname))) + ;;; DOM-parsing @@ -190,3 +204,58 @@ That is, 443 is implied by HTTPS, so nil is returned; but 998 wouldn’t be impl "Given a pathname, return the entirety of the file leaf. That is, everything following the last directory name." (car (last (split-sequence:split-sequence #\/ (namestring pathname))))) + +;; pathname pathname → pathname +(defun relative-pathname (from pathname) + "Create a relative pathname leading from the pathname FROM to PATHNAME. +By default, FROM is the current working directory." + (let* ((dir-str + (reduce (lambda (a b) + (format nil "~A/~A" a b)) + (cdr (relative-pathname-directory pathname from)))) + (dir-pathname + (pathname (format nil "~A/" dir-str)))) + (merge-pathnames dir-pathname + (pathname (pathname-leaf pathname))))) + +;; pathname pathname → list +(defun relative-pathname-directory (pathname &optional (from (uiop/os:getcwd))) + "Given a PATHNAME, create a relative PATHNAME-DIRECTORY-style list from the +perspective of the FROM pathname (the current working directory, by default). +Returns something along the lines of '(:RELATIVE “..” “DAD”)" + (let* ((from (cdr (pathname-directory from))) + (to (cdr (pathname-directory pathname))) + (from+to (list-lengths nil from to)) + (to-append (list))) + (append + (list :relative) + (remove-if + #'not + (loop for from-dir in (car from+to) + for to-dir in (cadr from+to) + collect + (cond ((equal from-dir to-dir) + nil) + ((not from-dir) + to-dir) + (to-dir + (setq to-append (append to-append (list to-dir))) + "..") + ('T + "..")))) + to-append))) + +;; object list … list → list(-of-lists) +(defun list-lengths (padding &rest lists) + "Equalize the lengths of LISTS, by padding the tail-end of smaller lists with +the PADDING object." + (let ((target-length + (apply #'max (mapcar #'length lists)))) + (mapcar + (lambda (lst) + (append + lst + (loop for i + upto (- target-length (length lst) 1) + collect padding))) + lists))) diff --git a/t/mirror-img.lisp b/t/mirror-img.lisp index d3548cd..08e9c05 100644 --- a/t/mirror-img.lisp +++ b/t/mirror-img.lisp @@ -24,6 +24,8 @@ (defparameter *mirror-img-result* nil) (defun relative-pathname (path) + "Return an absolute path adding the relative PATH to the :mirror-img +system’s path." (asdf:system-relative-pathname :mirror-img/tests path)) (defun run () @@ -47,12 +49,16 @@ 'T (stringp (setq *mirror-img-result* - (mirror-img:mirror-img (relative-pathname "t/testing-website/index.html") - "tmp"))))) + (let* ((html-path (relative-pathname "t/testing-website/index.html")) + (download-dir (relative-pathname "t/tmp/")) + (url-dir (pathname-utils:relative-pathname + (relative-pathname "t/") + download-dir))) + (mirror-img:mirror-img html-path download-dir :url-dir url-dir)))))) (define-test mirror-img.files-mirrored (:tags '(mirror-img)) (assert-equal - '("café" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you") + '("café" "classy_fireplace" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you") (sort (mapcar #'pathname-name (uiop:directory-files (relative-pathname "t/tmp/localhost/"))) #'string-lessp))) @@ -140,3 +146,4 @@ (assert-equal " lak -!?ĉ_berries.png" (mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png"))) +