fedi2html/fedi2html
2024-05-06 21:18:58 -05:00

459 lines
13 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
#―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
# Name: fedi2html
# Desc: Render a post from the fediverse into simple HTML.
# Auth: Jaidyn Ann <jadedctrl@posteo.at>
# Lisc: GNU GPLv3
# Reqs: curl, jq
# Date: 2024-03
#―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――
if test -z "$POST_TEMPLATE"; then
POST_TEMPLATE='
<article class="comment">
<a class="user" href="$ACCOUNT_URL">
<img class="avatar" src="$ACCOUNT_AVATAR">
<strong class="username">$ACCOUNT_NAME</strong>
<em class="useraddress">$ACCOUNT_ID</em>
</a>
<a class="address" href="$POST_URL">
<time title="$POST_DATE">$(date --date=$POST_DATE)</time>
</a>
<section>
$POST_CONTENT
</section>
<div class="attachments">
$POST_ATTACHMENTS
</div>
<div class="responses">
$POST_RESPONSES
</div>
</article>
'
fi
if test -z "$ATTACH_TEMPLATE"; then
ATTACH_TEMPLATE='
<div class="attachment">
<a href="$ATTACH_URL"><strong>$ATTACH_NAME</strong></a>
$ATTACH_DESC
</div>
'
fi
if test -z "$ATTACH_IMAGE_TEMPLATE"; then
ATTACH_IMAGE_TEMPLATE='
<figure class="attachment">
<a href="$ATTACH_URL">
<img src="$ATTACH_URL" alt="$ATTACH_DESC" title="$ATTACH_DESC">
</a>
<figcaption>
<a href="$ATTACH_URL"><b>$ATTACH_NAME</b></a><br>
$(if test $(echo "$ATTACH_DESC" | wc -c) -gt 52; then
echo $ATTACH_DESC | head -c 53 | sed 's%[[:space:]]*$%%'
echo …
else
echo $ATTACH_DESC
fi)
</figcaption>
</figure>
'
fi
if test -z "$EMOJI_TEMPLATE"; then
EMOJI_TEMPLATE='<img class="emoji" src="$EMOJI_URL" alt="$EMOJI_SHORTCODE" title="$EMOJI_SHORTCODE">'
fi
# Given a notes JSON, render it as HTML.
# The most important part of the script!
# render_post $post_data $context_data $tree_level
render_post() {
local post_data="$1"
local responses_data="$2"
local POST_TREE_LEVEL="$3"
local reblog=""
local acct_data="$(echo "$post_data" | jq -r .reblog.account)"
if test "$acct_data" = "null"; then
acct_data="$(echo "$post_data" | jq -r .account)"
else
reblog="1"
fi
local ACCOUNT_URL="$(echo "$acct_data" | jq -r .url)"
local ACCOUNT_ID="$(echo "$acct_data" | jq -r .fqn)"
local ACCOUNT_NAME="$(echo "$acct_data" | jq -r .display_name | replace_emojis "$acct_data")"
local ACCOUNT_AVATAR="$(echo "$acct_data" | jq -r .avatar)"
local POST_URL="$(echo "$post_data" | jq -r .url)"
local POST_DATE="$(echo "$post_data" | jq -r .created_at)"
local POST_CONTENT="$(echo "$post_data" | jq -r .content | replace_emojis "$post_data")"
local POST_ATTACHMENTS="$(media_attachments "$post_data")"
if test -z "$NO_RESPONSES"; then
local POST_RESPONSES="$(render_responses "$post_data" "$responses_data" "$POST_TREE_LEVEL")"
fi
env_subst "$POST_TEMPLATE"
}
# Render a posts responses one-by-one and recursively.
# Each branch of the response tree will be rendered completely before proceeding
# to the next.
# render_responses $post_data $context_data $tree_level
render_responses() {
local post_data="$1"
local responses_data="$2"
local level="$3"
if test -z "$level"; then level=0; fi
local id="$(echo "$post_data" | jq -r '.id')"
local responses="$(echo "$responses_data" | grep "in_reply_to_id.*$id")"
local IFS="
"
for response in $responses; do
render_post "$response" "$responses_data" "$(expr "$level" + 1)"
done
}
# Accepts a string over stdin; it will replace all emoji shortcodes along stdin
# input with appropriate <img> HTML, based on $EMOJI_TEMPLATE, and based on
# a posts JSON.
# echo ":blobcat:" | replace_emojis $post_data
replace_emojis() {
local post_data="$1"
local emojis="$(echo "$post_data" | jq -r '.emojis[]|(.url + "\t" + .shortcode)')"
local temp="$(mktemp)"
local IFS="
"
cat > "$temp"
for line in $emojis; do
local EMOJI_URL="$(echo "$line" | awk -F'\t' '{print $1}')"
local EMOJI_SHORTCODE="$(echo "$line" | awk -F'\t' '{print $2}')"
local value="$(env_subst "$EMOJI_TEMPLATE")"
sed -i "s%:${EMOJI_SHORTCODE}:%${value}%g" "$temp"
done
cat "$temp"
rm "$temp"
}
# Given a posts JSON data, return the appropriate HTML corresponding to its
# media attachments, if any. Will return an empty string if none.
# media_attachments $post_data
media_attachments() {
local post_data="$1"
local attachments="$(echo "$post_data" | jq -r '.media_attachments[]|(.type + "\t" + .url + "\t" + .description + "\t" + .preview_url)')"
local IFS="
"
for line in $attachments; do
local ATTACH_TYPE="$(echo "$line" | awk -F'\t' '{print $1}')"
local ATTACH_URL="$(echo "$line" | awk -F'\t' '{print $2}')"
local ATTACH_DESC="$(echo "$line" | awk -F'\t' '{print $3}')"
local ATTACH_PREVIEW="$(echo "$line" | awk -F'\t' '{print $4}')"
local ATTACH_NAME="$(basename "$ATTACH_URL")"
if test "$ATTACH_TYPE" = "image"; then
env_subst "$ATTACH_IMAGE_TEMPLATE"
else
env_subst "$ATTACH_TEMPLATE"
fi
done
}
# Pass a posts context JSON along stdin; out comes the response_data in JSON.
# fetch_post_context $url | context_to_responses
context_to_responses() {
jq '.descendants' 2> /dev/null \
| jq 'sort_by(.created_at)' \
| maybe_jq_reverse \
| jq -cr '.[]'
}
# Make a request to the /api/v1/statuses/:id/$request API endpoint.
# statuses_api_request $post_url $request
statuses_api_request() {
local post_url="$1"
local api_request="$2"
if test -n "$api_request"; then
api_request="/$api_request"
fi
local id="$(url_post_id "$post_url")"
local server="$(url_server "$post_url")"
curl --retry 3 --retry-delay 5 \
--location --header 'Accept: application/json,application/activity+json' \
"${server}/api/v1/statuses/${id}${api_request}"
}
# Require the context-JSON of a post, by URL.
# fetch_post_context $url
fetch_post_context() {
local url="$1"
statuses_api_request "$url" "context" \
| context_to_responses
}
# Given a post URL, request its JSON.
# fetch_post $url
fetch_post() {
local url="$1"
statuses_api_request "$url"
}
# Given a user-account URL, request JSON of its posts.
# fetch_post $url
fetch_user_posts() {
local url="$1"
local server="$(url_server "$url")"
local status_url="$server/api/v1/accounts/$(url_user_id "$url")/statuses"
status_url="${status_url}?exclude_reblogs=${EXCLUDE_REBLOGS}&limit=$MAX_POSTS"
status_url="${status_url}&exclude_replies=${EXCLUDE_REPLIES}$TAG_FILTER"
curl --retry 3 --retry-delay 5 \
--location --header 'Accept: application/json,application/activity+json' \
"$status_url" \
| jq -c '.[]'
}
# Return the ID of a user, based on its URL.
# url_user_id $url
url_user_id() {
local url="$1"
# Pleroma-style URLs: https://jam.xwx.moe/users/Tirifto
# Mastodon-style URLs: https://esperanto.masto.host/@jubiloEO
if echo "$url" | grep "/users/" > /dev/null; then
echo "$url" \
| sed 's%.*/users/%%'
elif echo "$url" | grep -E "/@[[:alnum:]]+[/]*$" > /dev/null; then
echo "$url" \
| sed 's%.*/@%%' \
| sed 's%/$%%'
else
return 1
fi
}
# Return the ID of a post, based on its URL.
# url_post_id $url
url_post_id() {
local url="$1"
# Pleroma-style URLs: https://jam.xwx.moe/notice/Ac6PIZAP0ZzkMTYBBg
# Mastodon-style URLs: https://esperanto.masto.host/@minjo/111461250815264185
if echo "$url" | grep "/notice/" > /dev/null; then
echo "$url" \
| sed 's%.*/notice/%%'
elif echo "$url" | grep -E "/@[[:alnum:]]+/[[:digit:]]+" > /dev/null; then
echo "$url" \
| sed 's%.*/@[[:alnum:]]*/%%'
else
return 1
fi
}
# Return the server (including protocol) of a post, based on its URL.
# url_server $url
url_server() {
local url="$1"
local protocol="$(echo "$url" | grep --only-matching '[[:alnum:]]*://')"
printf "$protocol"
echo "$url" \
| sed 's%^'"$protocol"'%%' \
| sed 's%/.*%%'
}
# Sanitize a template-string.
# AKA, escape quotation-marks.
# prep_template $template
prep_template() {
local template="$1"
echo "$template" \
| sed 's%\"%\\\"%g'
}
# Rough replacement for gettexts envsubst. Safe!
# This will evaluate a strings shell variables (somewhat) safely
# Probably not good for general use — but for our purposes, it only subsitutes
# along the “first level.” So environment variables are replaced, but those
# variables contents (AKA, post contents) arent evaluated.
# env_subst $template
# env_subst "$SHELL" → "/bin/sh"
env_subst() {
local template="$1"
eval "echo \"$(prep_template "$template")\""
}
# Based on the environment variable $REVERSE_ORDER (whether or not user provided
# the -I flag), reverse the JSON array over stdin.
# This is used to enable/disable reverse-chronological order of posts.
# fetch_context $url | jq '.descendants' | maybe_jq_reverse
maybe_jq_reverse() {
local input="$(cat)"
if test -n "$REVERSE_ORDER"; then
echo "$input" \
| jq 'reverse'
else
echo "$input"
fi
}
# Render a users posts, one-by-one, taking into account cli arguments.
# handle_user_url https://jam.xwx.moe/users/tirifto
handle_user_url() {
local url="$1"
local user_posts="$(fetch_user_posts "$url")"
local IFS="
"
echo "$user_posts" > jadedctrl.json
for post in $user_posts; do
local url="$(echo "$post" | jq -r '.url')"
if test -z "$NO_RESPONSES"; then
context="$(fetch_post_context "$url")"
if test -n "$NO_PARENT"; then
render_responses "$post" "$context" 0
else
render_post "$post" "$context" 0
fi
else
render_post "$post" "" 0
fi
done
}
# Render a post and/or its responses, taking into account cli arguments.
handle_post_url() {
local url="$1"
local post="$(fetch_post "$url")"
local context="$(fetch_post_context "$url")"
if test -n "$NO_PARENT"; then
render_responses "$post" "$context" 0
else
render_post "$post" "$context" 0
fi
}
usage() {
echo "usage: $(basename "$0") [-h] [-IRc] POST_URL"
echo " $(basename "$0") [-h] [-IRcbBt] [-m MAX] USER_URL"
echo
echo "$(basename "$0") does exactly what it says on the tin: It formats"
echo 'a fediverse post (and its replies) into text. By default, into'
echo 'simple-and-embeddable HTML.'
echo
echo ' -c only print the responses (children) of a post'
echo ' -h print this message and exit'
echo ' -I display posts in reverse-chronological order'
echo ' -R do not recursively display posts responses'
echo
echo ' -b exclude reblogs/repeats from user feed'
echo ' -B exclude top-level replies from user feed'
echo ' -m output at maximum the specified amount of posts'
echo ' -t filter posts from user feed by tag'
echo
echo 'It works with posts from any server that supports Mastodons API,'
echo 'including Pleroma, Akkoma, Glitch, etc.'
echo
echo 'Notably, it supports post-atachments and custom-emoji. Keep in mind'
echo 'that images are all fetched from remote sources. It is recommended,'
echo 'if privacy or total archival, is a concern, to use wget(1)s --mirror'
echo '(or something like it) to fetch even these foreign files.'
echo
echo '“Template” environment variables are used to generate the text'
echo 'output. A template is a string that contains shell-style variable'
echo "names (\$NAME) that $(basename "$0") will replace."
echo 'They are $POST_TEMPLATE, $ATTACH_TEMPLATE, and $EMOJI_TEMPLATE.'
echo
echo 'The variables that can be used in $POST_TEMPLATE are:'
echo ' $ACCOUNT_ID, $ACCOUNT_URL, $ACCOUNT_NAME, $ACCOUNT_AVATAR,'
echo ' $POST_URL, $POST_DATE, $POST_CONTENT, $POST_ATTACHMENTS,'
echo ' and $POST_RESPONSES.'
echo
echo 'The variables that can be used in $ATTACH_TEMPLATE are:'
echo ' $ATTACH_URL, $ATTACH_NAME, $ATTACH_TYPE, $ATTACH_DESC,'
echo ' and $ATTACH_PREVIEW.'
echo
echo 'The variables that can be used in $EMOJI_TEMPLATE are:'
echo ' $EMOJI_SHORTCODE and $EMOJI_URL.'
echo
echo 'See the first few lines of fedi2html for the default (example)'
echo 'template values; see the README for a more detailed description'
echo 'of these variables meanings.'
}
TAG_FILTER=""
EXCLUDE_REPLIES="false"
EXCLUDE_REBLOGS="false"
MAX_POSTS=20
while getopts 'hcIRt:bm:B' arg; do
case $arg in
h)
usage
exit 0
;;
c)
NO_PARENT="1"
;;
I)
REVERSE_ORDER="1"
;;
R)
NO_RESPONSES="1"
;;
t)
TAG_FILTER="&tagged=$OPTARG"
;;
b)
EXCLUDE_REBLOGS="true"
;;
m)
MAX_POSTS="$OPTARG"
;;
B)
EXCLUDE_REPLIES="true"
;;
esac
done
shift $((OPTIND-1))
URL="$1"
if test -z "$URL"; then
usage 1>&2
exit 2
fi
USER_ID="$(url_user_id "$URL")"
POST_ID="$(url_post_id "$URL")"
if test -n "$POST_ID"; then
handle_post_url "$URL"
elif test -n "$USER_ID"; then
handle_user_url "$URL"
else
echo 'That URL is not recognized as a post or user URL.' 1>&2
exit 3
fi