Tab-delimited output, rather than JSON

This is more UNIXy and line-processing friendly.
2023-09-13 00:09:41 -05:00 · 2023-09-13 00:09:41 -05:00 · 0819ed3987
parent cfc4d05622
commit 0819ed3987
3 changed files with 32 additions and 20 deletions
--- a/bookmarks-dl.sh
+++ b/bookmarks-dl.sh
@ -84,10 +84,18 @@ source_start() {
 # ————————————————————————————————————————
 # MISC. UTILS
 # ————————————————————————————————————————
+# Trims preceding and trailing spaces of a string.
+# Be warned: Uses extended regexps, a GNUism!
+trim_spaces() {
+	sed -E 's%^[[:space:]]+%%g' \
+		| sed -E 's%[[:space:]]+$%%g'
+}
+
+
 # Given some HTML, return it’s plain-text and deescaped form.
 html_text_deescape() {
-	lynx -dump -stdin \
-		| xargs echo    # Trim trailing/preceding whitespace.
+	lynx -stdin -dump -nolist --assume_charset=utf8 --display_charset=utf8 \
+		| trim_spaces
 }


--- a/sources/mastodon.sh
+++ b/sources/mastodon.sh
@ -25,7 +25,6 @@ fetch_bookmarks() {
 	local url="$3"
 	if test -z "$url"; then
 		url="https://$domain/api/v1/bookmarks?limit=40"
-		printf "[" # Start the JSON array
 	fi

 	local header_file="$(mktemp)"
@ -37,8 +36,7 @@ fetch_bookmarks() {
 	local next_url="$(header_next_link "$header_file")"
 	rm "$header_file"
 	if test -n "$next_url"; then
-		fetch_bookmarks "$auth" "$domain" "$next_url" \
-			| sed 's/^},},/}]/' # Two },}, means end of JSON array
+		fetch_bookmarks "$auth" "$domain" "$next_url"
 	fi
 }

@ -72,10 +70,16 @@ source_start() {

 # Given a page of /api/v1/bookmarks, parse into the simple bookmarks-dl format
 bookmarks_parse() {
-	jq -r '.[] | { "desc": .content, "href": .url, "added": .created_at }' \
-		| sed 's/^}/},/' \
-		| head -n-1
-	printf '},'
+	local bookmark_lines="$(jq -r '.[] | "\(.url)\t\t\(@json "\(.content)")\t\(.created_at)"')"
+	local IFS="
+"
+	for bookmark in $bookmark_lines; do
+		local url="$(echo "$bookmark" | awk -F '\t' '{print $1}')"
+		local date="$(echo "$bookmark" | awk -F '\t' '{print $4}')"
+		local desc="$(echo "$bookmark" | bookmark_line_desc)"
+		local title="$(echo "$desc" | head -c40)"
+		printf '%s\t%s\t%s\t%s\n' "$url" "$title" "$desc" "$date"
+    done
 }


@ -89,3 +93,13 @@ header_next_link() {
 		| sed 's/>;rel="next",.*//' \
 		| sed 's/link:<//'
 }
+
+
+# Given a tab-delimited bookmark line, process HTML description into plain-text.
+bookmark_line_desc() {
+	awk -F '\t' '{print $3}' \
+		| sed 's%^"%%' \
+		| sed 's%"$%%' \
+		| html_text_deescape \
+		| tr '\n\t' '  '
+}
--- a/sources/pixiv.sh
+++ b/sources/pixiv.sh
@ -37,9 +37,6 @@ fetch_bookmarks() {
 	local rest="$4"
 	if test -z "$offset"; then
 		offset=0
-		printf '[' # Start the JSON array…
-	else
-		printf ',' # Continue the array. (See printf in bookmarks_parse)
 	fi
 	# We want to download private *and* public bookmarks; start with private.
 	if test -z "$rest"; then
@ -65,9 +62,6 @@ fetch_bookmarks() {
 	# When finished downloading private bookmarks, start downloading public ones.
 	elif test "$rest" = "hide"; then
 		fetch_bookmarks "$user_id" "$auth" "0" "show"
-	# When finished downloading all bookmarks, close the JSON array.
-	else
-		printf ']'
 	fi
 }

@ -100,9 +94,5 @@ source_start() {


 bookmarks_parse() {
-	jq '.body.works[] | { "title": .title, "href": "https://www.pixiv.net/en/artworks/\(.id)", "desc": .alt }' \
-		| sed 's/^}/},/' \
-		| head -n-1
-	# The last element might be the last, so don’t add a comma after it.
-	printf '}'
+	jq -r '.body.works[] | "https://www.pixiv.net/en/artworks/\(.id)\t\(.title)\t\(.alt)\t"'
 }