divercities_dl/divercities_dl
2020-10-22 16:23:49 -05:00

305 lines
8.2 KiB
Bash

#!/bin/sh
# --------------------------------------
# name: divercities_dl
# main: jaidedctrl
# lisc: cc0
# date: 2020
# requires: `jq`, `lynx`, and `ffmpeg`
# --------------------------------------
# ==============================================================================
# FETCHING
# ==============================================================================
# url referal -> data
# fetch data from a given url, using divercities auth cookie
function fetch_page {
local url="$1"
local referal="$2"
curl "$url" \
--progress-bar \
-L \
-H 'Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5'\
-H 'Accept-Language: en-US,en;q=0.5' --compressed \
-H 'Range: bytes=0-' \
-H "Referer: https://music.divercities.eu/albums/$referal" \
-H 'Connection: keep-alive' \
-H 'DNT: 1' \
-H "Cookie: _divercities_session=$DC_COOKIE" \
-H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:80.0) Gecko/20100101 Firefox/80.0'
}
# album_id -> html
# fetch an album's page
function fetch_album {
local album_id="$1"
fetch_page "https://music.divercities.eu/albums/$album_id"
}
# artist_id -> html
# fetch an artist's page
function fetch_artist {
local artist_id="$1"
fetch_page "https://music.divercities.eu/artists/$artist_id"
}
# ==============================================================================
# PARSE
# ==============================================================================
# artist_html -> numbers
# take a list of album IDs from an artist's page
function artist_album_ids {
grep "album-cover " \
| grep "id:" \
| sed 's%.*id: %%' \
| sed 's%}).*%%'
}
# |album_html -> album_track_spans
# gets all track spans (containing garbled track json) from an album's page
function album_track_spans {
grep 'track-title' \
| grep -v 'span class="track-title"'
}
# |album_track_span -> track_json
# takes a track span ripped from an album's page, turns it into json
function track_span_to_json {
sed 's%.* {"id":%%' \
| lynx -dump -stdin -width=10000 -assume_charset=UTF-8 -display_charset=UTF-8 \
| tr -d '\n' \
| sed 's% %%g' \
| sed 's%^%{"id":%'
}
# |track_json -> string
# takes the track json, and returns a pretty, comma-delimited list of its artists
function track_json_artists {
jq -r '.artists[].name' 2>/dev/null \
| tr '\n' ',' \
| sed 's%,%, %g' \
| sed 's%, $%%'
}
# ==============================================================================
# DOWNLOAD
# ==============================================================================
# artist_id -> nil
# download all albums from an artist
function dl_artist {
local artist_id="$1"
local albums="$(fetch_artist "$artist_id" | artist_album_ids)"
for album in $albums; do
dl_album "$album"
done
}
# album_id -> nil
# download every track of a given album
function dl_album {
local album_id="$1"
local tempA="$(mktemp --suffix=divercities)"
local tempB="$(mktemp --suffix=divercities)"
fetch_album "$album_id" \
| album_track_spans \
> "$tempA"
while IFS= read -r line; do
echo "$line" \
| track_span_to_json \
> "$tempB"
dl_track_from_json "$tempB" "$BITRATE"
done < "$tempA"
rm "$tempA" "$tempB"
}
# track_json_file "low"/"high" -> nil
# downloads a track, using the track json as source for metadata and URL
function dl_track_from_json {
local json="$1"; local bitrate="$2"
local title="$(cat "$json" | jq -r '.title' 2>/dev/null)"
local album="$(cat "$json" | jq -r '.album.title' 2>/dev/null)"
local artists="$(cat "$json" | track_json_artists | awk -F , '{print $1}')"
local url_stub="$(cat "$json" | jq -r ".${bitrate}_bitrate_url" 2>/dev/null)"
local url="https://music.divercities.eu${url_stub}"
local dir="."
local output="${dir}/${title}"
if test "$SEMANTIC" -eq 0; then
mkdir -p "${artists}/${album}/" 2>/dev/null
dir="${artists}/${album}"
output="${dir}/${title}"
fi
if test -e "${output}.mp3"; then
return
fi
if test ! -e "${dir}/cover.jpg"; then
fetch_page "$(cat "$json" | jq -r '.cover.large_url' 2>/dev/null)" \
> "${dir}/cover.jpg"
fi
cat "$json" \
| jq 2>/dev/null \
> "${output}.json"
fetch_page "$url" \
> "${output}.mp3"
track_metadata "${output}" "$title" "$album" "$artists"
}
# ==============================================================================
# METADATA
# ==============================================================================
# output, title, album, artist -> nil
# handle a track's metadata (updating, realigning, etc.)
function track_metadata {
local mp3="${1}.mp3"; local json="${1}.json"
local title="$2"; local album="$3"; local artists="$4"
local track_no="$(cat "$json" | jq -r '.track_number')"
local temp="$(mktemp --suffix=divercities)"
local temp_mp3_a="$(mktemp --suffix='divercities.mp3')"
local temp_mp3_b="$(mktemp --suffix='divercities.mp3')"
ffmpeg -i "$mp3" 2> "$temp"
cp "$mp3" "$temp_mp3_a"
metadata_title "$temp" "$temp_mp3_a" "$temp_mp3_b" "$title"
metadata_album "$temp" "$temp_mp3_a" "$temp_mp3_b" "$album"
metadata_artist "$temp" "$temp_mp3_a" "$temp_mp3_b" "$artists"
metadata_date "$temp" "$temp_mp3_a" "$temp_mp3_b"
metadata_track "$temp" "$temp_mp3_a" "$temp_mp3_b" "$track_no"
cp "$temp_mp3_a" "$mp3"
rm "$temp_mp3_a" "$temp_mp3_b"
}
# handle title metadata; copy over if exists, otherwise create
function metadata_title {
local data="$1"
local mp3_a="$2"; local mp3_b="$3"; rm "$mp3_b"
local title="$4"
if grep "title" "$data" >/dev/null; then
return
else
ffmpeg -i "$mp3_a" -codec copy -metadata title="$title" "$mp3_b"
cp "$mp3_b" "$mp3_a"
fi
}
# handle artist metadata; copy over if exists, otherwise create
function metadata_album {
local data="$1"
local mp3_a="$2"; local mp3_b="$3"; rm "$mp3_b"
local album="$4"
if grep "album" "$data" >/dev/null; then
return
else
ffmpeg -i "$mp3_a" -codec copy -metadata album="$album" "$mp3_b"
cp "$mp3_b" "$mp3_a"
fi
}
# handle artist metadata; copy over if exists, otherwise create
function metadata_artist {
local data="$1"
local mp3_a="$2"; local mp3_b="$3"; rm "$mp3_b"
local artist="$4"
# if grep "TOPE" "$data" >/dev/null; then
# artist="$(grep "TOPE" "$data" | awk '{print $3}')"
# fi
if grep "artist" "$data" >/dev/null; then
return
else
ffmpeg -i "$mp3_a" -codec copy -metadata artist="$artist" "$mp3_b"
cp "$mp3_b" "$mp3_a"
fi
}
# handle date metadata; copy over if exists
function metadata_date {
local data="$1"
local mp3_a="$2"; local mp3_b="$3"; rm "$mp3_b"
if grep "TORY" "$data" >/dev/null; then
date="$(grep "TORY" "$data" | awk '{print $3}')"
ffmpeg -i "$mp3_a" -codec copy -metadata date="$date" "$mp3_b"
cp "$mp3_b" "$mp3_a"
fi
}
# handle track metadata; add it if nonexistant
function metadata_track {
local data="$1"
local mp3_a="$2"; local mp3_b="$3"; rm "$mp3_b"
local track_no="$4"
if grep "track" "$data" >/dev/null; then
return
else
ffmpeg -i "$mp3_a" -codec copy -metadata track="$track_no" "$mp3_b"
cp "$mp3_b" "$mp3_a"
fi
}
# ==============================================================================
# INVOCATION
# ==============================================================================
SEMANTIC=0
BITRATE="high"
ALBUM=""
GROUP=""
# print usage and seppaku
function usage {
echo "usage: divercities_dl [-hsl] -a album_url/id"
echo " divercities_dl [-hsl] -g artist_url/id"
echo " -a download the album of the given url/id"
echo " -g download albums of artist at given url/id"
echo " -s don't put tracks in semantic path (./artist/album/track.mp3)"
echo " -l use low bitrate downloads"
echo " -h print this message"
echo
echo "To use this script, you need to first log into Divercities-- then, take the value of"
echo "the _divercities_session cookie in your browser, and put it into the env variable"
echo "\$DC_COOKIE."
exit "$1"
}
# --------------------------------------
while getopts 'lsht:g:a:' c; do
case $c in
h) usage 1 ;;
l) BITRATE="low" ;;
s) SEMANTIC=1 ;;
a) ALBUM="$OPTARG" ;;
g) GROUP="$OPTARG" ;;
esac
done
if test -z "$DC_COOKIE"; then
echo "Env variable \$DC_COOKIE not set!"
echo
usage 3
fi
if test -n "$ALBUM"; then
ALBUM="$(echo "$ALBUM" | sed 's%.*/%%')"
dl_album "$ALBUM"
elif test -n "$GROUP"; then
GROUP="$(echo "$GROUP" | sed 's%.*/%%')"
dl_artist "$GROUP"
else
usage 2
fi