fedi2html/fedi2html
Jaidyn Ann 8ca6350dda Make shellcheck (slightly) more happy
… though `dash` still doesn’t work. ;<
2024-10-31 14:46:26 -05:00

461 lines
12 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
#―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
# Name: fedi2html
# Desc: Render a post from the fediverse into simple HTML.
# Auth: Jaidyn Ann <jadedctrl@posteo.at>
# Lisc: GNU GPLv3
# Reqs: curl, jq
# Date: 2024-03
#―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
if test -z "$POST_TEMPLATE"; then
POST_TEMPLATE='
<article class="comment">
<a class="user" href="$ACCOUNT_URL">
<img class="avatar" src="$ACCOUNT_AVATAR">
<strong class="username">$ACCOUNT_NAME</strong>
<em class="useraddress">$ACCOUNT_ID</em>
</a>
<a class="address" href="$POST_URL">
<time title="$POST_DATE">$(date --date=$POST_DATE)</time>
</a>
<section>
$POST_CONTENT
</section>
<div class="attachments">
$POST_ATTACHMENTS
</div>
<div class="responses">
$POST_RESPONSES
</div>
</article>
'
fi
if test -z "$ATTACH_TEMPLATE"; then
ATTACH_TEMPLATE='
<div class="attachment">
<a href="$ATTACH_URL"><strong>$ATTACH_NAME</strong></a>
$ATTACH_DESC
</div>
'
fi
if test -z "$ATTACH_IMAGE_TEMPLATE"; then
ATTACH_IMAGE_TEMPLATE='
<figure class="attachment">
<a href="$ATTACH_URL">
<img src="$ATTACH_URL" alt="$ATTACH_DESC" title="$ATTACH_DESC">
</a>
<figcaption>
<a href="$ATTACH_URL"><b>$ATTACH_NAME</b></a><br>
$(if test $(echo "$ATTACH_DESC" | wc -c) -gt 52; then
echo $ATTACH_DESC | head -c 53 | sed 's%[[:space:]]*$%%'
echo …
else
echo $ATTACH_DESC
fi)
</figcaption>
</figure>
'
fi
if test -z "$EMOJI_TEMPLATE"; then
EMOJI_TEMPLATE='<img class="emoji" src="$EMOJI_URL" alt="$EMOJI_SHORTCODE" title="$EMOJI_SHORTCODE">'
fi
# Given a notes JSON, render it as HTML.
# The most important part of the script!
# render_post $post_data $context_data $tree_level
render_post() (
post_data="$1"
responses_data="$2"
POST_TREE_LEVEL="$3"
reblog=""
acct_data="$(echo "$post_data" | jq -r .reblog.account)"
if test "$acct_data" = "null"; then
acct_data="$(echo "$post_data" | jq -r .account)"
else
reblog="1"
fi
ACCOUNT_URL="$(echo "$acct_data" | jq -r .url)"
ACCOUNT_ID="$(echo "$acct_data" | jq -r .fqn)"
ACCOUNT_NAME="$(echo "$acct_data" | jq -r .display_name | replace_emojis "$acct_data")"
ACCOUNT_AVATAR="$(echo "$acct_data" | jq -r .avatar)"
POST_URL="$(echo "$post_data" | jq -r .url)"
POST_DATE="$(echo "$post_data" | jq -r .created_at)"
POST_CONTENT="$(echo "$post_data" | jq -r .content | replace_emojis "$post_data")"
POST_ATTACHMENTS="$(media_attachments "$post_data")"
if test -z "$NO_RESPONSES"; then
POST_RESPONSES="$(render_responses "$post_data" "$responses_data" "$POST_TREE_LEVEL")"
fi
env_subst "$POST_TEMPLATE"
)
# Render a posts responses one-by-one and recursively.
# Each branch of the response tree will be rendered completely before proceeding
# to the next.
# render_responses $post_data $context_data $tree_level
render_responses() (
post_data="$1"
responses_data="$2"
level="$3"
if test -z "$level"; then level=0; fi
id="$(echo "$post_data" | jq -r '.id')"
responses="$(echo "$responses_data" | grep "in_reply_to_id.*$id")"
IFS="
"
for response in $responses; do
render_post "$response" "$responses_data" "$((level + 1))"
done
)
# Accepts a string over stdin; it will replace all emoji shortcodes along stdin
# input with appropriate <img> HTML, based on $EMOJI_TEMPLATE, and based on
# a posts JSON.
# echo ":blobcat:" | replace_emojis $post_data
replace_emojis() (
post_data="$1"
emojis="$(echo "$post_data" | jq -r '.emojis[]|(.url + "\t" + .shortcode)')"
temp="$(mktemp)"
IFS="
"
cat > "$temp"
for line in $emojis; do
EMOJI_URL="$(echo "$line" | awk -F'\t' '{print $1}')"
EMOJI_SHORTCODE="$(echo "$line" | awk -F'\t' '{print $2}')"
value="$(env_subst "$EMOJI_TEMPLATE")"
sed -i "s%:${EMOJI_SHORTCODE}:%${value}%g" "$temp"
done
cat "$temp"
rm "$temp"
)
# Given a posts JSON data, return the appropriate HTML corresponding to its
# media attachments, if any. Will return an empty string if none.
# media_attachments $post_data
media_attachments() (
post_data="$1"
attachments="$(echo "$post_data" | jq -r '.media_attachments[]|(.type + "\t" + .url + "\t" + .description + "\t" + .preview_url)')"
IFS="
"
for line in $attachments; do
ATTACH_TYPE="$(echo "$line" | awk -F'\t' '{print $1}')"
ATTACH_URL="$(echo "$line" | awk -F'\t' '{print $2}')"
ATTACH_DESC="$(echo "$line" | awk -F'\t' '{print $3}')"
ATTACH_PREVIEW="$(echo "$line" | awk -F'\t' '{print $4}')"
ATTACH_NAME="$(basename "$ATTACH_URL")"
if test "$ATTACH_TYPE" = "image"; then
env_subst "$ATTACH_IMAGE_TEMPLATE"
else
env_subst "$ATTACH_TEMPLATE"
fi
done
)
# Pass a posts context JSON along stdin; out comes the response_data in JSON.
# fetch_post_context $url | context_to_responses
context_to_responses() (
jq '.descendants' 2> /dev/null \
| jq 'sort_by(.created_at)' \
| maybe_jq_reverse \
| jq -cr '.[]'
)
# Make a request to the /api/v1/statuses/:id/$request API endpoint.
# statuses_api_request $post_url $request
statuses_api_request() (
post_url="$1"
api_request="$2"
if test -n "$api_request"; then
api_request="/$api_request"
fi
id="$(url_post_id "$post_url")"
server="$(url_server "$post_url")"
curl --retry 3 --retry-delay 5 \
--location --header 'Accept: application/json,application/activity+json' \
"${server}/api/v1/statuses/${id}${api_request}"
)
# Require the context-JSON of a post, by URL.
# fetch_post_context $url
fetch_post_context() (
url="$1"
statuses_api_request "$url" "context" \
| context_to_responses
)
# Given a post URL, request its JSON.
# fetch_post $url
fetch_post() (
url="$1"
statuses_api_request "$url"
)
# Given a user-account URL, request JSON of its posts.
# fetch_post $url
fetch_user_posts() (
url="$1"
server="$(url_server "$url")"
status_url="$server/api/v1/accounts/$(url_user_id "$url")/statuses"
status_url="${status_url}?exclude_reblogs=${EXCLUDE_REBLOGS}&limit=$MAX_POSTS"
status_url="${status_url}&exclude_replies=${EXCLUDE_REPLIES}$TAG_FILTER"
curl --retry 3 --retry-delay 5 \
--location --header 'Accept: application/json,application/activity+json' \
"$status_url" \
| jq -c '.[]'
)
# Return the ID of a user, based on its URL.
# url_user_id $url
url_user_id() (
url="$1"
# Pleroma-style URLs: https://jam.xwx.moe/users/Tirifto
# Mastodon-style URLs: https://esperanto.masto.host/@jubiloEO
if echo "$url" | grep "/users/" > /dev/null; then
echo "$url" \
| sed 's%.*/users/%%'
elif echo "$url" | grep -E "/@[[:alnum:]]+[/]*$" > /dev/null; then
echo "$url" \
| sed 's%.*/@%%' \
| sed 's%/$%%'
else
return 1
fi
)
# Return the ID of a post, based on its URL.
# url_post_id $url
url_post_id() (
url="$1"
# Pleroma-style URLs: https://jam.xwx.moe/notice/Ac6PIZAP0ZzkMTYBBg
# Mastodon-style URLs: https://esperanto.masto.host/@minjo/111461250815264185
if echo "$url" | grep "/notice/" > /dev/null; then
echo "$url" \
| sed 's%.*/notice/%%'
elif echo "$url" | grep -E "/@[[:alnum:]]+/[[:digit:]]+" > /dev/null; then
echo "$url" \
| sed 's%.*/@[[:alnum:]]*/%%'
else
return 1
fi
)
# Return the server (including protocol) of a post, based on its URL.
# url_server $url
url_server() (
url="$1"
protocol="$(echo "$url" | grep --only-matching '[[:alnum:]]*://')"
printf '%s' "$protocol"
echo "$url" \
| sed 's%^'"$protocol"'%%' \
| sed 's%/.*%%'
)
# Sanitize a template-string.
# AKA, escape quotation-marks.
# prep_template $template
prep_template() (
template="$1"
echo "$template" \
| sed 's%\"%\\\"%g'
)
# Rough replacement for gettexts envsubst. Safe!
# This will evaluate a strings shell variables (somewhat) safely
# Probably not good for general use — but for our purposes, it only subsitutes
# along the “first level.” So environment variables are replaced, but those
# variables contents (AKA, post contents) arent evaluated.
# env_subst $template
# env_subst "$SHELL" → "/bin/sh"
env_subst() (
template="$1"
eval "echo \"$(prep_template "$template")\""
)
# Based on the environment variable $REVERSE_ORDER (whether or not user provided
# the -I flag), reverse the JSON array over stdin.
# This is used to enable/disable reverse-chronological order of posts.
# fetch_context $url | jq '.descendants' | maybe_jq_reverse
maybe_jq_reverse() (
input="$(cat)"
if test -n "$REVERSE_ORDER"; then
echo "$input" \
| jq 'reverse'
else
echo "$input"
fi
)
# Render a users posts, one-by-one, taking into account cli arguments.
# handle_user_url https://jam.xwx.moe/users/tirifto
handle_user_url() (
url="$1"
user_posts="$(fetch_user_posts "$url")"
IFS="
"
echo "$user_posts" > jadedctrl.json
for post in $user_posts; do
url="$(echo "$post" | jq -r '.url')"
if test -z "$NO_RESPONSES"; then
context="$(fetch_post_context "$url")"
if test -n "$NO_PARENT"; then
render_responses "$post" "$context" 0
else
render_post "$post" "$context" 0
fi
else
render_post "$post" "" 0
fi
done
)
# Render a post and/or its responses, taking into account cli arguments.
handle_post_url() (
url="$1"
post="$(fetch_post "$url")"
context="$(fetch_post_context "$url")"
if test -n "$NO_PARENT"; then
render_responses "$post" "$context" 0
else
render_post "$post" "$context" 0
fi
)
usage() (
echo "usage: $(basename "$0") [-h] [-IRc] POST_URL"
echo " $(basename "$0") [-h] [-IRcbBt] [-m MAX] USER_URL"
echo
echo "$(basename "$0") does exactly what it says on the tin: It formats"
echo 'a fediverse post (and its replies) into text. By default, into'
echo 'simple-and-embeddable HTML.'
echo
echo ' -c only print the responses (children) of a post'
echo ' -h print this message and exit'
echo ' -I display posts in reverse-chronological order'
echo ' -R do not recursively display posts responses'
echo
echo ' -b exclude reblogs/repeats from user feed'
echo ' -B exclude top-level replies from user feed'
echo ' -m output at maximum the specified amount of posts'
echo ' -t filter posts from user feed by tag'
echo
echo 'It works with posts from any server that supports Mastodons API,'
echo 'including Pleroma, Akkoma, Glitch, etc.'
echo
echo 'Notably, it supports post-atachments and custom-emoji. Keep in mind'
echo 'that images are all fetched from remote sources. It is recommended,'
echo 'if privacy or total archival, is a concern, to use wget(1)s --mirror'
echo '(or something like it) to fetch even these foreign files.'
echo
echo '“Template” environment variables are used to generate the text'
echo 'output. A template is a string that contains shell-style variable'
echo "names (\$NAME) that $(basename "$0") will replace."
echo 'They are $POST_TEMPLATE, $ATTACH_TEMPLATE, and $EMOJI_TEMPLATE.'
echo
echo 'The variables that can be used in $POST_TEMPLATE are:'
echo ' $ACCOUNT_ID, $ACCOUNT_URL, $ACCOUNT_NAME, $ACCOUNT_AVATAR,'
echo ' $POST_URL, $POST_DATE, $POST_CONTENT, $POST_ATTACHMENTS,'
echo ' and $POST_RESPONSES.'
echo
echo 'The variables that can be used in $ATTACH_TEMPLATE are:'
echo ' $ATTACH_URL, $ATTACH_NAME, $ATTACH_TYPE, $ATTACH_DESC,'
echo ' and $ATTACH_PREVIEW.'
echo
echo 'The variables that can be used in $EMOJI_TEMPLATE are:'
echo ' $EMOJI_SHORTCODE and $EMOJI_URL.'
echo
echo 'See the first few lines of fedi2html for the default (example)'
echo 'template values; see the README for a more detailed description'
echo 'of these variables meanings.'
)
TAG_FILTER=""
EXCLUDE_REPLIES="false"
EXCLUDE_REBLOGS="false"
MAX_POSTS=20
while getopts 'hcIRt:bm:B' arg; do
case $arg in
h)
usage
exit 0
;;
c)
NO_PARENT="1"
;;
I)
REVERSE_ORDER="1"
;;
R)
NO_RESPONSES="1"
;;
t)
TAG_FILTER="&tagged=$OPTARG"
;;
b)
EXCLUDE_REBLOGS="true"
;;
m)
MAX_POSTS="$OPTARG"
;;
B)
EXCLUDE_REPLIES="true"
;;
*)
;;
esac
done
shift $((OPTIND-1))
URL="$1"
if test -z "$URL"; then
usage 1>&2
exit 2
fi
USER_ID="$(url_user_id "$URL")"
POST_ID="$(url_post_id "$URL")"
if test -n "$POST_ID"; then
handle_post_url "$URL"
elif test -n "$USER_ID"; then
handle_user_url "$URL"
else
echo 'That URL is not recognized as a post or user URL.' 1>&2
exit 3
fi