From 23335cba3e7e4283b240d670b3c987fac4017e4f Mon Sep 17 00:00:00 2001 From: thoralmighty Date: Wed, 26 Jul 2023 01:15:17 +0200 Subject: [PATCH] Removed stash script in favour of rewritten Python version (with help from ChatGPT) --- env-template.txt | 2 +- stash.sh | 42 ++++++++++-- stasher.py | 174 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+), 6 deletions(-) create mode 100644 stasher.py diff --git a/env-template.txt b/env-template.txt index b09f4a7..2ce35f9 100644 --- a/env-template.txt +++ b/env-template.txt @@ -1,6 +1,6 @@ STASH_IMPORT_DIR= STASH_API_KEY= -STASH_YTDLP_FORMAT= +STASH_YTDLP_FORMAT="%(title)s [%(id)s].%(ext)s" STASH_HOST= STASH_PORT= FANCTL_SERVO_PIN= diff --git a/stash.sh b/stash.sh index 48b8c8a..40a6792 100755 --- a/stash.sh +++ b/stash.sh @@ -13,31 +13,64 @@ source .env TARGET_DIR=$(readlink -m "$STASH_IMPORT_DIR/$(date +%Y%m)") mkdir -p $TARGET_DIR +update_stash() { + echo "Running scan for new items in Stash..." + curl -S -s -o /dev/null -X POST -H "ApiKey: $STASH_API_KEY" -H "Content-Type: application/json" --data '{ "query": "mutation { metadataScan (input:{useFileMetadata: false})}" }' $STASH_HOST:$STASH_PORT/graphql +} + +find_booru_artist() { + # TODO: Reduce html to just name + PAGE_URL="$1" + ARTIST_NAME=$(curl -s "$PAGE_URL" | xmllint --format --html -xpath "/html/body/div[1]/section/ul/li[1]/a/text()" - 2>/dev/null) + ARTIST_NAME=$(echo $ARTIST_NAME | tr -dc '[:alnum:]\n\r' | tr '[:upper:]' '[:lower:]' | tr ' ' '_') + echo $ARTIST_NAME__ +} + download_file() { FILE_URL="$1" extensions="(jpg|JPG|jpeg|JPEG|png|PNG|gif|GIF|mp4|MP4)" rgx_file="^.*\.$extensions$" rgx_filename="[A-Za-z0-9_]*.$extensions" - rgx_dbu='http(s?)://.*donmai.us.*/posts/' - if [[ $FILE_URL =~ $rgx_dbu ]]; then - FILE_URL=$(curl -s "$1" | grep -Eo "http(s?)://.*donmai.us.*/original/[A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4)" | grep '__' -m1) + rgx_booru='https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))' + rgx_booru_v1='(https?://.*/original/([A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4))|https?://img[a-z0-9.]+\.[a-z]+\.com/(?:images|/samples)/.*/([_0-9a-z]*\.(jpg|jpeg|png|gif|mp4)))' + rgx_booru_v2='(https?://.*/original/([A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4))|https?://img[a-z0-9.]+\.[a-z]+\.com/images/([0-9a-z]+/)+([_0-9a-z]+\.(jpg|jpeg|png|gif|mp4)))' + + if [[ $FILE_URL =~ $rgx_booru ]]; then + ARTIST=$(find_booru_artist "$FILE_URL") + echo "Artist is: $ARTIST" + FILE_URL=$(curl -s "$FILE_URL" | grep -Eo "$rgx_booru_v2" -m1 | head -1) #| grep '__' -m1) + fi + + if [ -z "$FILE_URL" ]; then + return 1 fi if [[ $FILE_URL =~ $rgx_file ]]; then echo $STASH_PRINT_PREFIX $(echo $FILE_URL | grep -Eo "$rgx_filename") curl -sO "$FILE_URL" --output-dir "$2/" + return $? else echo $STASH_PRINT_PREFIX $FILE_URL yt-dlp $FILE_URL -o "$2/$3" + return $? fi } +if [ $1 == "--update" ]; then + update_stash + exit $? +fi + rgx_url='^http(s?):\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&\/\/=]*$' if [[ $1 =~ $rgx_url ]]; then # Download using yt-dlp download_file "$1" "$TARGET_DIR" "$STASH_YTDLP_FORMAT" + if [ $? -ne 0 ]; then + echo "Stopped" + exit 1 + fi else isFile=$(file -0 "$1" | cut -d $'\0' -f2) case "$isFile" in @@ -55,5 +88,4 @@ else fi # Update stash -echo "Updating Stash..." -curl -S -s -o /dev/null -X POST -H "ApiKey: $STASH_API_KEY" -H "Content-Type: application/json" --data '{ "query": "mutation { metadataScan (input:{useFileMetadata: false})}" }' $STASH_HOST:$STASH_PORT/graphql +update_stash diff --git a/stasher.py b/stasher.py new file mode 100644 index 0000000..140900b --- /dev/null +++ b/stasher.py @@ -0,0 +1,174 @@ +import os +import re +import sys +import requests +import subprocess +from lxml import html +from dotenv import load_dotenv + +import argparse + +# Load variables from .env file +load_dotenv() + +url_pattern = r'^http(s?):\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&\/\/=]*$' + +STASH_IMPORT_DIR = os.getenv("STASH_IMPORT_DIR_TEMP") or os.getenv("STASH_IMPORT_DIR") +STASH_API_KEY = os.getenv("STASH_API_KEY") +STASH_HOST = os.getenv("STASH_HOST") +STASH_PORT = os.getenv("STASH_PORT") +STASH_YTDLP_FORMAT = os.getenv("STASH_YTDLP_FORMAT") +STASH_PRINT_PREFIX = os.getenv("STASH_PRINT_PREFIX") + +def find_booru_artist(page_url): + response = requests.get(page_url) + + if response.status_code != 200: + print(f"Error: Unable to fetch page from {page_url}") + return None + + # Parse the HTML content + tree = html.fromstring(response.content) + + # Extract the artist name using XPath + artist_name = tree.xpath("/html/body/div[1]/section/ul/li[1]/a/text()") or tree.xpath("/html/body/div[1]/div[2]/div/div/aside/section[2]/div/ul[1]/li/a[2]/text()") + if not artist_name: + print("Warning: Artist name not found on the page.") + return None + + # Clean up and format the artist name + artist_name = artist_name[0].strip() + artist_name = ''.join(c if c.isalnum() or c.isspace() else '_' for c in artist_name).lower().strip() + + return artist_name + +def update_stash(): + print("Running scan for new items in Stash...") + url = f"http://{STASH_HOST}:{STASH_PORT}/graphql" + headers = { + "ApiKey": STASH_API_KEY, + "Content-Type": "application/json", + } + data = '{"query": "mutation { metadataScan (input:{useFileMetadata: false})}" }' + try: + response = requests.post(url, headers=headers, data=data) + if response.ok: + print("Update successful!") + else: + print(f"Update failed with status code: {response.status_code}") + print(response.text) + exit(1) + except requests.exceptions.RequestException as e: + print(f"Update error: {e}") + exit(1) + +def download_file(file_url, download_dir, ytdlp_prefix): + extensions = "(jpg|JPG|jpeg|JPEG|png|PNG|gif|GIF|mp4|MP4)" + rgx_file = r"^.*\.{0}$".format(extensions) + rgx_filename = r"[A-Za-z0-9_]*\.{0}".format(extensions) + rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))' + rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions) + rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions) + + artist = None + if re.match(rgx_booru, file_url): + artist = find_booru_artist(file_url) + print("Artist is:", artist) + file_url = re.search(rgx_booru_v2, requests.get(file_url).text).group(1).strip() + + if not file_url: + return 1 + + if re.match(rgx_file, file_url): + print(STASH_PRINT_PREFIX, file_url) + try: + response = requests.get(file_url, stream=True) + if response.status_code == 200: + filename = ((artist + "__") if artist else "") + re.search(rgx_filename, file_url).group(0) + download_path = os.path.join(download_dir, filename) + + exists = os.path.isfile(download_path) + + if exists and not any(arg in sys.argv for arg in ("--overwrite", "-o")): + print("Destination file already exists:", filename) + return False + + with open(download_path, 'wb') as f: + for chunk in response.iter_content(8192): + f.write(chunk) + print("Saved as:", filename) + return True + elif response.status_code == 403: + print("Error: HTTP 403 Forbidden - Access to the file is forbidden.") + return False + else: + print(f"Error: Failed to download the file. Status code: {response.status_code}") + return False + except requests.exceptions.RequestException as e: + print(f"Error: {e}") + return False + else: + print(STASH_PRINT_PREFIX, file_url) + download_path = os.path.join(download_dir, ytdlp_prefix) + command = ['yt-dlp', file_url, '-o', download_path] + try: + subprocess.run(command, check=True) + return True + except subprocess.CalledProcessError as e: + print(f"Failed to run yt-dlp command. Error: {e}") + return False + +def find_path_or_url_arg(): #chatgpt + global url_pattern + for arg in sys.argv[1:]: + # Check if the argument is a valid file path + if os.path.exists(arg): + return arg + + # Use regular expression to check if the argument matches a URL pattern + if re.match(url_pattern, arg): + return arg + + # If no path or URL argument is found, return None + return None + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Download files or update stash.") + parser.add_argument("url_or_path", metavar="URL_or_path", nargs="?", help="URL or file path to download") + parser.add_argument("-o", "--overwrite", action="store_true", help="Overwrite existing files if present") + parser.add_argument("-u", "--update", action="store_true", help="Update stash") + parser.add_argument("-n", "--no-update", action="store_true", help="Do not update stash") + + args = parser.parse_args() + + if args.update and args.no_update: + print("Conflicting arguments: --update and --no-update cannot be used together.") + exit(1) + + if args.update: + update_stash() + exit(0) + + url_or_path = args.url_or_path + if url_or_path is None: + print("Valid URL or file path required") + exit(1) + elif re.match(url_pattern, url_or_path): + # Download using yt-dlp + if not download_file(url_or_path, STASH_IMPORT_DIR, STASH_YTDLP_FORMAT): + print("Stopped") + exit(1) + else: + is_file = subprocess.check_output(["file", "-0", url_or_path]).decode().split("\x00")[1] + if "text" in is_file: + # Download as multiple URLs from the provided source file + print(f"Reading list of {sum(1 for _ in open(url_or_path))} URL(s)") + with open(url_or_path) as source_file: + for url in source_file: + download_file(url.strip(), STASH_IMPORT_DIR, STASH_YTDLP_FORMAT) + else: + subprocess.run(["rsync", url_or_path, STASH_IMPORT_DIR], check=True) + + # Update stash + if not args.no_update: + update_stash()