Compare commits

..

No commits in common. "74acf3fa2dc1177faeec4cbe49990f162023a013" and "7807afece243854e928c857587446ba3d44555cc" have entirely different histories.

3 changed files with 19 additions and 47 deletions

View File

@ -4,7 +4,7 @@
:version "0.1" :version "0.1"
:license "GPLv3" :license "GPLv3"
:author "Jaidyn Ann <jadedctrl@posteo.at>" :author "Jaidyn Ann <jadedctrl@posteo.at>"
:depends-on (:dexador :lquery :pathname-utils :split-sequence) :depends-on (:dexador :lquery :split-sequence)
:components ((:file "src/mirror-img")) :components ((:file "src/mirror-img"))
:in-order-to ((test-op (test-op "mirror-img/tests")) :in-order-to ((test-op (test-op "mirror-img/tests"))
(build-op (build-op "mirror-img/unix")))) (build-op (build-op "mirror-img/unix"))))

View File

@ -29,32 +29,26 @@
;;; Mirror-img ;;; Mirror-img
;;; ———————————————————————————————————————— ;;; ————————————————————————————————————————
;; pathname pathname list → string ;; pathname pathname list → string
(defun mirror-img (html-file download-dir &key (url-dir download-dir) (tags *default-tags*)) (defun mirror-img (html-file base-dir &key (tags *default-tags*))
"Attempt to mirror all remote HREF/SRC URLs of an HTML files tags, "Attempt to mirror all remote HREF/SRC URLs of an HTML files tags,
downloading them to DOWNLOAD-DIR. For each remote URL that could be downloading them to base-dir. The remote URLs will be replaced with the
successfuly mirrored, the remote URLs will be replaced with the local local version, where downloading was successful.
version. The URL used will will be BASE-DIR plus the filename, where
BASE-DIR defaults to DOWNLOAD-DIR.
Returns a string of the modified pages HTML." Returns a string of the modified pages HTML."
(let* ((dom (let* ((dom (lquery:$ (lquery:initialize html-file)))
(lquery:$ (lquery:initialize html-file)))
(urlpathname-alist (urlpathname-alist
(mirror-linked-urls dom :tags tags :download-dir download-dir)) (mirror-linked-urls dom :tags tags :base-dir base-dir))
(urlrelative-url-alist (urlrelative-url-alist
(mapcar (mapcar (lambda (pair) (cons (car pair) (namestring (cdr pair))))
(lambda (pair) urlpathname-alist)))
(cons (car pair)
(mirror-pathname->mirror-url (cdr pair) download-dir url-dir)))
urlpathname-alist)))
(substitute-urls dom (substitute-urls dom
urlrelative-url-alist urlrelative-url-alist
:tags tags) :tags tags)
(aref (lquery:$ dom (serialize)) 0))) (aref (lquery:$ dom (serialize)) 0)))
;; lquery-dom list pathname → alist ;; lquery-dom list pathname → alist
(defun mirror-linked-urls (dom &key (tags *default-tags*) download-dir) (defun mirror-linked-urls (dom &key (tags *default-tags*) base-dir)
"Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery "Mirror all URLs in the HREF/SRC attributes of the given tags in an LQuery
DOM, downloading them to either the current-working directory or the DOWNLOAD-DIR. DOM, downloading them to either the current-working directory or the base-dir.
Returns an associative list containing pairs of remote-URLs to local pathnames. Returns an associative list containing pairs of remote-URLs to local pathnames.
Any unsuccessfuly-mirrored URL is excluded from the alist. Any unsuccessfuly-mirrored URL is excluded from the alist.
For example: For example:
@ -64,7 +58,7 @@ For example:
(mapcar (lambda (url) (mapcar (lambda (url)
(let ((download-path (let ((download-path
(ensure-directories-exist (ensure-directories-exist
(mirrored-pathname url :base-dir download-dir)))) (mirrored-pathname url :base-dir base-dir))))
(if (http-fetch url download-path) (if (http-fetch url download-path)
(cons url download-path)))) (cons url download-path))))
(linked-urls dom :tags tags)))) (linked-urls dom :tags tags))))
@ -102,14 +96,6 @@ to mirror it."
(quri:uri-host url) (quri:uri-host url)
(pathname-leaf path))))) (pathname-leaf path)))))
;; pathname pathname string → string
(defun mirror-pathname->mirror-url (pathname download-dir url-dir)
"Convert a mirrored file into a substitution-worthy URL, from its PATHNAME,
DOWNLOAD-DIR, and URL-DIR."
(format nil "~A~A"
url-dir
(pathname-utils:relative-pathname download-dir pathname)))
;;; DOM-parsing ;;; DOM-parsing
@ -153,9 +139,8 @@ already set)."
(defun http-fetch (url path) (defun http-fetch (url path)
"Download a URL to a path; if successful, returns the pathname. Otherwise, NIL." "Download a URL to a path; if successful, returns the pathname. Otherwise, NIL."
(handler-case (handler-case
; DEXADOR:FETCH returns nil on success, errors on fail. Hence the OR. (and (dexador:fetch (url-encode-uri url) path)
(or (dexador:fetch (url-encode-uri url) path) (pathname path))
(pathname path))
(cl-user::file-exists () (cl-user::file-exists ()
path) path)
(error nil))) (error nil)))

View File

@ -24,41 +24,29 @@
(defparameter *mirror-img-result* nil) (defparameter *mirror-img-result* nil)
(defun relative-pathname (path) (defun relative-pathname (path)
"Return an absolute path adding the relative PATH to the :mirror-img
systems path."
(asdf:system-relative-pathname :mirror-img/tests path)) (asdf:system-relative-pathname :mirror-img/tests path))
(defun run () (defun run ()
"Run all tests for the mirror-img package."
(let* ((doc-root (relative-pathname "t/testing-website/")) (let* ((doc-root (relative-pathname "t/testing-website/"))
(acceptor (make-instance 'hunchentoot:easy-acceptor (acceptor (make-instance 'hunchentoot:easy-acceptor
:port 4242 :port 4242
:document-root doc-root)) :document-root doc-root))
(server (hunchentoot:start acceptor))) (server (hunchentoot:start acceptor)))
(lisp-unit2:with-summary () (lisp-unit2:with-summary ()
(handler-case (run-tests :package :mirror-img/tests)
(progn (hunchentoot:stop server))))
(run-tests :package :mirror-img/tests)
(hunchentoot:stop server))
(error (c)
(hunchentoot:stop server)
c)))))
(define-test mirror-img (:tags '(mirror-img)) (define-test mirror-img (:tags '(mirror-img))
(assert-eq (assert-eq
'T 'T
(stringp (stringp
(setq *mirror-img-result* (setq *mirror-img-result*
(let* ((html-path (relative-pathname "t/testing-website/index.html")) (mirror-img:mirror-img (relative-pathname "t/testing-website/index.html")
(download-dir (relative-pathname "t/tmp/")) "tmp")))))
(url-dir (pathname-utils:relative-pathname
(relative-pathname "t/")
download-dir)))
(mirror-img:mirror-img html-path download-dir :url-dir url-dir))))))
(define-test mirror-img.files-mirrored (:tags '(mirror-img)) (define-test mirror-img.files-mirrored (:tags '(mirror-img))
(assert-equal (assert-equal
'("café" "classy_fireplace" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you") '("café" "fireplace" "merry christmas!!! ^_^" "more_calming" "style" "welcoming you")
(sort (mapcar #'pathname-name (sort (mapcar #'pathname-name
(uiop:directory-files (relative-pathname "t/tmp/localhost/"))) (uiop:directory-files (relative-pathname "t/tmp/localhost/")))
#'string-lessp))) #'string-lessp)))
@ -146,4 +134,3 @@ systems path."
(assert-equal (assert-equal
" lak -!?ĉ_berries.png" " lak -!?ĉ_berries.png"
(mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png"))) (mirror-img::pathname-leaf #p"/mom/dad?!?Ljla asdasd'' aslkdjĉ/apple/laksdjal/ lak -!?ĉ_berries.png")))