Tab-delimited output, rather than JSON

This is more UNIXy and line-processing friendly.
This commit is contained in:
Jaidyn Ann 2023-09-13 00:09:41 -05:00
parent cfc4d05622
commit 0819ed3987
3 changed files with 32 additions and 20 deletions

View File

@ -84,10 +84,18 @@ source_start() {
# ————————————————————————————————————————
# MISC. UTILS
# ————————————————————————————————————————
# Trims preceding and trailing spaces of a string.
# Be warned: Uses extended regexps, a GNUism!
trim_spaces() {
sed -E 's%^[[:space:]]+%%g' \
| sed -E 's%[[:space:]]+$%%g'
}
# Given some HTML, return its plain-text and deescaped form.
html_text_deescape() {
lynx -dump -stdin \
| xargs echo # Trim trailing/preceding whitespace.
lynx -stdin -dump -nolist --assume_charset=utf8 --display_charset=utf8 \
| trim_spaces
}

View File

@ -25,7 +25,6 @@ fetch_bookmarks() {
local url="$3"
if test -z "$url"; then
url="https://$domain/api/v1/bookmarks?limit=40"
printf "[" # Start the JSON array
fi
local header_file="$(mktemp)"
@ -37,8 +36,7 @@ fetch_bookmarks() {
local next_url="$(header_next_link "$header_file")"
rm "$header_file"
if test -n "$next_url"; then
fetch_bookmarks "$auth" "$domain" "$next_url" \
| sed 's/^},},/}]/' # Two },}, means end of JSON array
fetch_bookmarks "$auth" "$domain" "$next_url"
fi
}
@ -72,10 +70,16 @@ source_start() {
# Given a page of /api/v1/bookmarks, parse into the simple bookmarks-dl format
bookmarks_parse() {
jq -r '.[] | { "desc": .content, "href": .url, "added": .created_at }' \
| sed 's/^}/},/' \
| head -n-1
printf '},'
local bookmark_lines="$(jq -r '.[] | "\(.url)\t\t\(@json "\(.content)")\t\(.created_at)"')"
local IFS="
"
for bookmark in $bookmark_lines; do
local url="$(echo "$bookmark" | awk -F '\t' '{print $1}')"
local date="$(echo "$bookmark" | awk -F '\t' '{print $4}')"
local desc="$(echo "$bookmark" | bookmark_line_desc)"
local title="$(echo "$desc" | head -c40)"
printf '%s\t%s\t%s\t%s\n' "$url" "$title" "$desc" "$date"
done
}
@ -89,3 +93,13 @@ header_next_link() {
| sed 's/>;rel="next",.*//' \
| sed 's/link:<//'
}
# Given a tab-delimited bookmark line, process HTML description into plain-text.
bookmark_line_desc() {
awk -F '\t' '{print $3}' \
| sed 's%^"%%' \
| sed 's%"$%%' \
| html_text_deescape \
| tr '\n\t' ' '
}

View File

@ -37,9 +37,6 @@ fetch_bookmarks() {
local rest="$4"
if test -z "$offset"; then
offset=0
printf '[' # Start the JSON array…
else
printf ',' # Continue the array. (See printf in bookmarks_parse)
fi
# We want to download private *and* public bookmarks; start with private.
if test -z "$rest"; then
@ -65,9 +62,6 @@ fetch_bookmarks() {
# When finished downloading private bookmarks, start downloading public ones.
elif test "$rest" = "hide"; then
fetch_bookmarks "$user_id" "$auth" "0" "show"
# When finished downloading all bookmarks, close the JSON array.
else
printf ']'
fi
}
@ -100,9 +94,5 @@ source_start() {
bookmarks_parse() {
jq '.body.works[] | { "title": .title, "href": "https://www.pixiv.net/en/artworks/\(.id)", "desc": .alt }' \
| sed 's/^}/},/' \
| head -n-1
# The last element might be the last, so dont add a comma after it.
printf '}'
jq -r '.body.works[] | "https://www.pixiv.net/en/artworks/\(.id)\t\(.title)\t\(.alt)\t"'
}