chicken-vcarded/vcarded.scm

418 lines
16 KiB
Scheme
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;; Copyright © 2024 Jaidyn Ann <jadedctrl@posteo.at>
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of
;; the License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
(module vcarded
(read-vcard write-vcard normalize-vcard)
(import
scheme
(chicken base)
(chicken condition)
(chicken io)
(chicken irregex)
(srfi 1)
(srfi 4)
(srfi 13)
(srfi 19)
(srfi 130)
(srfi 207)
(prefix uri-common uri:))
;;; ~ Macros ~
;; Ignore whatever conditions the expression might return; just return the
;; default value in that case.
;; (ignore-error (+ 1 "seven") 3) → 3
(define-syntax ignore-error
(syntax-rules ()
((ignore-error expr default)
(condition-case expr (var () default)))))
;;; ~ Exports ~
;; Reader thunk. Read/parse an entire vCard into a “vCard alist.”
;; vCard is read from an optional PORT, defaulting to (current-input-port).
;; Ignore the IGNORED parameter.
;;
;; The format of a cell is:
;; (PROPERTY-SYMBOL (ATTRIBUTE-STR …) PROPERTY-VALUE)
;;
;; PROPERTY-VALUE might be a string, a uri-common URI, an srfi-19 datetime,
;; an srfi-4 bytevector (u8vector), or a list of values.
;;
;; The type of the value follows logically from the vCard specifications
;; type for that property — URI properties are a uri-common URI and NICKNAME
;; is a list of strings, for example.
;;
;; But there is one notable exception: `data`-URIs are parsed into bytevectors
;; for convenience, as these are often used to encode profile pictures.
;;
;; Anyway, here is an example vCard alist made by this function:
;; ((BEGIN () "VCARD")
;; (VERSION () "3.0")
;; (FN () "Birdie Johnson")
;; (NICKNAME () ("Bird" "Gal" "Tweety"))
;; (TEL ("type=home") #<URI-common: scheme=tel path=("12149989852")>)
;; (END () "VCARD"))
(define (read-vcard #!optional (port (current-input-port)) (ignored #t))
(let* [(first-element? ignored)
(element (read-vcard-element port first-element?))]
(if (not (eof-object? (peek-char port)))
(append (list element) (read-vcard port #f))
(list element))))
;; Writes a vCards parsed alist to the output port as a string.
;; Defaults to (current-output-port).
;;
;; I wish I had more to say, especially compared to READ-VCARDs essay-long
;; description, but unfortunately Im coming up empty! =w=,
(define (write-vcard vcard-alist #!optional (port (current-output-port)))
(for-each (lambda (element)
(write-folded-line (serialize-vcard-element element) port))
vcard-alist))
;; Returns a “normalized” version of the given VCARD-ALIST.
;; Will make sure that required properties (“BEGIN” and “END”) are included,
;; and optionally update the PRODID and VERSION properties.
(define (normalize-vcard vcard-alist #!optional
(update-prod-id? #t) (update-version? #t))
(let ([empty-alist? (or (null? vcard-alist)
(eq? (length vcard-alist) 1))])
(when update-version?
(set! vcard-alist
(alist-update 'VERSION (list '() "4.0") vcard-alist)))
(when update-prod-id?
(set! vcard-alist
(alist-update 'PRODID (list '() ".//jadedctrl//chicken-vcarded") vcard-alist)))
(when (or empty-alist?
(not (eq? (caar vcard-alist) 'BEGIN)))
(set! vcard-alist
(append '((BEGIN () "VCARD"))
(remove (lambda (a) (eq? (car a) 'BEGIN))
vcard-alist))))
(when (or empty-alist?
(not (eq? (car (last vcard-alist)) 'END)))
(set! vcard-alist
(append
(remove (lambda (a) (eq? (car a) 'END)) vcard-alist)
'((END () "VCARD")))))
vcard-alist))
;;; ~ vCard parsing [string→alist] ~
;; Read a single unfolded line into a vCard “element” list.
;; The optional FIRST-ELEMENT? argument simply makes sure the next line is a
;; BEGIN:VCARD element (as the first element in any valid vCard file should
;; be. It is used internally by READ-VCARD.
(define (read-vcard-element #!optional (port (current-input-port)) (first-element? #f))
(let* [(line (read-folded-line (if first-element? 100 #f) port))]
(cond
;; Ignore blank lines, theyre not doing anyone any harm!
[(or (string-null? line)
(irregex-search "^[[:whitespace:]]+$" line))
(read-vcard-element port first-element?)]
;; If weve previously established this is a vCard stream were dealing
;; with, just go ahead and parse. If we havent established that, make
;; sure its a BEGIN:VCARD line first.
[(or (not first-element?)
(and first-element?
(string-contains line "BEGIN")
(string-contains line "VCARD")))
(parse-vcard-line line)]
;; If were still making sure the file is vCard, and the first non-blank
;; line *isnt* a BEGIN:VCARD line, then the file probably isnt a vCard.
[#t
(signal
(condition '(exn location vcarded message
"Not a valid vCard file.")
'(file)
'(vcard)))])))
;; Parse a line of a vcard file into an alist-friendly format:
;; (KEY ("keyprop1=d" "keyprop2=b") "VALUE" ("valprop1=a" "valprop2=b"))
(define (parse-vcard-line line)
(let* [(prop-value-strings (split-vcard-line line))
(prop-elements (string-split-unescaped (car prop-value-strings) ";"))
(value-elements (cdr prop-value-strings))
(property (string->symbol (string-upcase (car prop-elements))))]
(append (parse-vcard-property property (cdr prop-elements))
(parse-vcard-value property value-elements))))
;; Splits a line into a cons of the property-string and value-string.
;; … basically splits the string along the first unescaped colon (:).
;; "apple\:berry:mom:dad" → ("apple\:berry" . "mom:dad")
(define (split-vcard-line line)
(let [(split (string-split-unescaped line ":"))]
(cons (car split)
(string-join (cdr split) ":"))))
;; Given a vCard property and its values (e.g., “VERSION” and “3.0”),
;; parse them into a list.
;; "EMAIL;TYPE=home" "mom@dad.com" → '(EMAIL ("TYPE=home") "mom@dad.com")
(define (parse-vcard-property property elements)
(append (list property)
(list elements)))
;; Given the value(s) of a vCard elements value string(s), returned a
;; parsed object.
;; 'BIRTHDAY "2024-01-02T00:00:00" → #@2024-01-02T00:00:00-0600
(define (parse-vcard-value prop elements)
(let [(parser-and-unparser-funcs (alist-ref prop vcard-value-parsers))]
(if parser-and-unparser-funcs
(list (ignore-error (apply (car parser-and-unparser-funcs)
(list elements))
elements))
(list elements))))
;;; ~ vCard serialization [alist→string] ~
;; With an element of a parsed vCard alist, serialize it (back) into a
;; string.
(define (serialize-vcard-element element)
(let [(property (string-join
(append (list (symbol->string (car element)))
(second element)) ";"))
(value (serialize-vcard-value (car element) (last element)))]
(string-join (list property value) ":")))
;; Serialize the value of a vCard property (from a parsed vCard alist)
;; into a string.
(define (serialize-vcard-value prop value)
(let [(parser-and-unparser-funcs (alist-ref prop vcard-value-parsers))]
(if parser-and-unparser-funcs
(apply (cadr parser-and-unparser-funcs) (list value))
value)))
;;; ~ Property-value parsers/serializers ~
;; List of all properties with semicoloned-structured strings.
(define vcard-semicoloned-properties
'(ADR CLIENTPIDMAP GENDER N))
;; List of all properties with datetime values.
(define vcard-datetime-properties
'(ANNIVERSARY BDAY REV))
;; List of all properties with multiple comma-separated values.
(define vcard-csv-properties
'(CATEGORIES NICKNAME))
;; List of all URL-type vcard properties.
(define vcard-url-properties
'(CALADRURI CALURI FBURL GEO IMPP
KEY LOGO MEMBER PHOTO RELATED
SOUND SOURCE TEL UID URL))
;; Should parse any truncated & reduced-accuracy ISO 8601 datetime.
;; … right now, it only parses a few possibilities.
(define (string->any-date str)
(let* [(ymd "~Y~m~d")
(hms "~H~M~S")
(ymd-hms (string-join (list ymd hms) "T"))]
(or (ignore-error (string->date str ymd-hms) #f)
(ignore-error (string->date str ymd) #f)
(ignore-error (string->date str hms) #f))))
;; Parse a string into a URI object.
(define (string->uri string)
(uri:uri-reference string))
;; Parse a data URIs contents into a list containing the mimetype (string)
;; and bytevector contents (u8vector).
;; "data:text/plain;base64,ZGFk" → ("text/plain;base64" #u8(100 97 100))
(define (data-uri-str->bytevector uri)
(let* [(str (string-drop uri (string-length "data:")))
(split (string-split str ","))
(mimetype (car split))
(contents (string-join (cdr split) ","))]
(list
mimetype
(if (string-contains mimetype "base64")
(base64->bytevector contents)
(list->u8vector
(map char->integer
(string->list contents)))))))
;; Given a bytevector and according mimetype, return the associated
;; data-URI.
;; "text/plain;base64" #u8(100 97 100) → "data:text/plain;base64,ZGFk"
(define (bytevector->data-uri-str mimetype bytevector)
(string-concatenate
(list
"data:"
mimetype
","
(if (string-contains mimetype "base64")
(bytevector->base64 bytevector)
(list->string
(map integer->char
(u8vector->list bytevector)))))))
;; Given a URI propertys value string(s) (like PHOTO or URL), return either
;; a URI (if URI is valid), string (if invalid), or u8vector/bytevector (if
;; data-URI).
(define (parse-uri-prop-value . strings)
(let* [(str (string-join strings ";"))
;; We parse data URIs ourselves; so lets not be redundant.
(uri (if (not (string-prefix? "data:" str))
(string->uri str)))]
(cond
[(string-prefix? "data:" str)
(data-uri-str->bytevector str)]
[(not (uri:uri? uri))
str]
[#t
uri])))
;; Given a parsed URI propertys value, serialize back into the original
;; string-form… as best we can.
(define (serialize-uri-prop-value value)
(cond [(uri:uri? value)
(irregex-replace
":./"
(uri:uri->string value)
":")] ;; Some URIs have “./” mistakenly prepended.
[(list? value)
(bytevector->data-uri-str (car value) (cadr value))]
[(string? value)
value]
[#t
""]))
;; A list of the parser/serializer functions for each vcard property.
;; ((TEL #<procedure> #<procedure>)
;; (ADR #<procedure> #<procedure>)
;; …)
;; TODO: Add a parser for the TZ [timezone] property.
(define vcard-value-parsers
(append
(map (lambda (uri-prop)
(list uri-prop
parse-uri-prop-value
serialize-uri-prop-value))
vcard-url-properties)
(map (lambda (date-prop)
(list date-prop
string->any-date
(lambda (datetime)
(if (date? datetime)
(date->string datetime "~Y~m~dT~H~M~S~z")
datetime))))
vcard-datetime-properties)
(map (lambda (csv-prop)
(list csv-prop
(lambda (str) (string-split-unescaped str ",")
(lambda (csv-list) (string-join csv-list ",")))))
vcard-csv-properties)
(map (lambda (semicolon-prop)
(list semicolon-prop
(lambda (str) (string-split-unescaped str ";"))
(lambda (sc-list) (string-join sc-list ";"))))
vcard-semicoloned-properties)))
;;; ~ General utilities ~ ^^
;; Reader-thunk. Read a “logical” folded-line, where a line beginning with a
;; space is a continuation of the previous line — like with vcards.
;; LIMIT is the maximum amount of characters it will read before giving up
;; on finding a newline.
;; It will default to (current-input-port) as the PORT.
(define (read-folded-line #!optional (limit #f) (port (current-input-port)))
(let [(port (or port (current-input-port)))]
(if (not (eof-object? (peek-char port)))
(let [(line (read-line port limit))]
(if (or (eq? (peek-char port) #\space)
(eq? (peek-char port) #\tab))
(string-concatenate
(list line
(string-drop (read-folded-line limit port) 1)))
line))
"")))
;; Write a line to a port as a “logical” folded-line string, in the style of
;; vCard and vCalendar. If the line is too long (passed FOLD-LENGTH [70]), it
;; will be written as several lines of FOLD-LENGTH, with each such line
;; prepended by the CREASE-CHAR (#\tab).
;; It will default to (current-output-port), if PORT is #f or unspecified.
(define (write-folded-line string #!optional
(port (current-output-port))
(fold-length 70)
(crease-char #\tab))
(write-string
(string-concatenate
(list (string-join (string-split-by-lengths string fold-length)
(list->string (list #\return #\newline crease-char)))
(list->string '(#\return #\newline))))
#f (or port (current-output-port))))
;; Split a STRING into strings of a size of (at most) LENGTH signs.
;; ("ÁPPLE" 2) → '("ÁP" "PL" "E")
(define (string-split-by-lengths string length)
(append (list (if (<= (string-length string) length)
string
(substring string 0 length)))
(if (<= (string-length string) length)
'()
(string-split-by-lengths (substring string length) length))))
;; Splits a string along a delimiter; while not splitting along
;; backslash-escaped delimiters. With “;” as the delimiter:
;; "Bird;dad;apple\;mom" → ("Bird" "dad" "apple\;mom")
(define (string-split-unescaped string delimiter)
(let [(appendee "")]
(remove
not
(map (lambda (substr)
(let [(substr (string-concatenate (list appendee substr)))]
(if (and (not (string-null? substr))
(eq? (last (string->list substr))
#\\))
(and (set! appendee
(string-concatenate (list substr delimiter)))
#f)
(and (set! appendee "") substr))))
(string-split string delimiter))))))