#!/bin/bash STASH_IMPORT_DIR="" STASH_API_KEY="" STASH_HOST="" STASH_PORT=0 STASH_YTDLP_FORMAT="%(title)s [%(id)s].%(ext)s" # https://github.com/yt-dlp/yt-dlp#output-template STASH_PRINT_PREFIX="Get: " STASH_URL_FIXERS=() source .env TARGET_DIR=$(readlink -m "$STASH_IMPORT_DIR/$(date +%Y%m)") mkdir -p $TARGET_DIR update_stash() { echo "Running scan for new items in Stash..." curl -S -s -o /dev/null -X POST -H "ApiKey: $STASH_API_KEY" -H "Content-Type: application/json" --data '{ "query": "mutation { metadataScan (input:{useFileMetadata: false})}" }' $STASH_HOST:$STASH_PORT/graphql } find_booru_artist() { # TODO: Reduce html to just name PAGE_URL="$1" ARTIST_NAME=$(curl -s "$PAGE_URL" | xmllint --format --html -xpath "/html/body/div[1]/section/ul/li[1]/a/text()" - 2>/dev/null) ARTIST_NAME=$(echo $ARTIST_NAME | tr -dc '[:alnum:]\n\r' | tr '[:upper:]' '[:lower:]' | tr ' ' '_') echo $ARTIST_NAME__ } download_file() { FILE_URL="$1" extensions="(jpg|JPG|jpeg|JPEG|png|PNG|gif|GIF|mp4|MP4)" rgx_file="^.*\.$extensions$" rgx_filename="[A-Za-z0-9_]*.$extensions" rgx_booru='https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))' rgx_booru_v1='(https?://.*/original/([A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4))|https?://img[a-z0-9.]+\.[a-z]+\.com/(?:images|/samples)/.*/([_0-9a-z]*\.(jpg|jpeg|png|gif|mp4)))' rgx_booru_v2='(https?://.*/original/([A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4))|https?://img[a-z0-9.]+\.[a-z]+\.com/images/([0-9a-z]+/)+([_0-9a-z]+\.(jpg|jpeg|png|gif|mp4)))' if [[ $FILE_URL =~ $rgx_booru ]]; then ARTIST=$(find_booru_artist "$FILE_URL") echo "Artist is: $ARTIST" FILE_URL=$(curl -s "$FILE_URL" | grep -Eo "$rgx_booru_v2" -m1 | head -1) #| grep '__' -m1) fi if [ -z "$FILE_URL" ]; then return 1 fi if [[ $FILE_URL =~ $rgx_file ]]; then echo $STASH_PRINT_PREFIX $(echo $FILE_URL | grep -Eo "$rgx_filename") curl -sO "$FILE_URL" --output-dir "$2/" return $? else echo $STASH_PRINT_PREFIX $FILE_URL yt-dlp $FILE_URL -o "$2/$3" return $? fi } if [ $1 == "--update" ]; then update_stash exit $? fi rgx_url='^http(s?):\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&\/\/=]*$' if [[ $1 =~ $rgx_url ]]; then # Download using yt-dlp download_file "$1" "$TARGET_DIR" "$STASH_YTDLP_FORMAT" if [ $? -ne 0 ]; then echo "Stopped" exit 1 fi else isFile=$(file -0 "$1" | cut -d $'\0' -f2) case "$isFile" in (*text*) # Download as multiple URLs from the provided source file echo "Reading list of $(wc -l $1 | awk '{print $1}') URL(s)" while read p; do download_file "$p" "$TARGET_DIR" "$STASH_YTDLP_FORMAT" done <$1 ;; (*) rsync "$1" "$TARGET_DIR/" ;; esac fi # Update stash update_stash