Removed stash script in favour of rewritten Python version (with help from ChatGPT)

2023-07-26 01:15:17 +02:00 · 2023-07-26 01:15:17 +02:00 · e5245c0d9b
commit e5245c0d9b
parent c641bd35e7
4 changed files with 180 additions and 60 deletions
--- a/env-template.txt
+++ b/env-template.txt
@ -1,6 +1,6 @@
 STASH_IMPORT_DIR=
 STASH_API_KEY=
-STASH_YTDLP_FORMAT=
+STASH_YTDLP_FORMAT="%(title)s [%(id)s].%(ext)s"
 STASH_HOST=
 STASH_PORT=
 FANCTL_SERVO_PIN=
--- a/setup.sh
+++ b/setup.sh
@ -9,5 +9,10 @@ elif [ ! -z "$1" ]; then
 fi

 # Installs prerequisites for some of the system scripts
+# Redundant package names are acceptable as this improves readability

+# General
 python -m pip install python-dotenv
+
+# Stasher
+python -m pip install requests lxml python-dotenv
--- a/stash.sh
+++ b/stash.sh
@ -1,59 +0,0 @@
-#!/bin/bash
-
-STASH_IMPORT_DIR=""
-STASH_API_KEY=""
-STASH_HOST=""
-STASH_PORT=0
-STASH_YTDLP_FORMAT="%(title)s [%(id)s].%(ext)s"  # https://github.com/yt-dlp/yt-dlp#output-template
-STASH_PRINT_PREFIX="Get: "
-STASH_URL_FIXERS=()
-
-source .env
-
-TARGET_DIR=$(readlink -m "$STASH_IMPORT_DIR/$(date +%Y%m)")
-mkdir -p $TARGET_DIR
-
-download_file() {
-    FILE_URL="$1"
-
-    extensions="(jpg|JPG|jpeg|JPEG|png|PNG|gif|GIF|mp4|MP4)"
-    rgx_file="^.*\.$extensions$"
-    rgx_filename="[A-Za-z0-9_]*.$extensions"
-    rgx_dbu='http(s?)://.*donmai.us.*/posts/'
-    if [[ $FILE_URL =~ $rgx_dbu ]]; then
-	FILE_URL=$(curl -s "$1" | grep -Eo "http(s?)://.*donmai.us.*/original/[A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4)" | grep '__' -m1)
-    fi
-
-    if [[ $FILE_URL =~ $rgx_file ]]; then
-	echo $STASH_PRINT_PREFIX $(echo $FILE_URL | grep -Eo "$rgx_filename")
-        curl -sO "$FILE_URL" --output-dir "$2/"
-    else
-        echo $STASH_PRINT_PREFIX $FILE_URL
-        yt-dlp $FILE_URL -o "$2/$3"
-    fi
-}
-
-rgx_url='^http(s?):\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&\/\/=]*$'
-
-if [[ $1 =~ $rgx_url ]]; then
-    # Download using yt-dlp
-    download_file "$1" "$TARGET_DIR" "$STASH_YTDLP_FORMAT"
-else
-    isFile=$(file -0 "$1" | cut -d $'\0' -f2)
-    case "$isFile" in
-        (*text*)
-            # Download as multiple URLs from the provided source file
-            echo "Reading list of $(wc -l $1 | awk '{print $1}') URL(s)"
-            while read p; do
-                download_file "$p" "$TARGET_DIR" "$STASH_YTDLP_FORMAT"
-            done <$1
-            ;;
-        (*)
-            rsync "$1" "$TARGET_DIR/"
-            ;;
-    esac
-fi
-
-# Update stash
-echo "Updating Stash..."
-curl -S -s -o /dev/null -X POST -H "ApiKey: $STASH_API_KEY" -H "Content-Type: application/json" --data '{ "query": "mutation { metadataScan (input:{useFileMetadata: false})}" }' $STASH_HOST:$STASH_PORT/graphql
--- a/stasher.py
+++ b/stasher.py
@ -0,0 +1,174 @@
+import os
+import re
+import sys
+import requests
+import subprocess
+from lxml import html
+from dotenv import load_dotenv
+
+import argparse
+
+# Load variables from .env file
+load_dotenv()
+
+url_pattern = r'^http(s?):\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&\/\/=]*$'
+
+STASH_IMPORT_DIR = os.getenv("STASH_IMPORT_DIR_TEMP") or os.getenv("STASH_IMPORT_DIR")
+STASH_API_KEY = os.getenv("STASH_API_KEY")
+STASH_HOST = os.getenv("STASH_HOST")
+STASH_PORT = os.getenv("STASH_PORT")
+STASH_YTDLP_FORMAT = os.getenv("STASH_YTDLP_FORMAT")
+STASH_PRINT_PREFIX = os.getenv("STASH_PRINT_PREFIX")
+
+def find_booru_artist(page_url):
+    response = requests.get(page_url)
+
+    if response.status_code != 200:
+        print(f"Error: Unable to fetch page from {page_url}")
+        return None
+
+    # Parse the HTML content
+    tree = html.fromstring(response.content)
+
+    # Extract the artist name using XPath
+    artist_name = tree.xpath("/html/body/div[1]/section/ul/li[1]/a/text()") or tree.xpath("/html/body/div[1]/div[2]/div/div/aside/section[2]/div/ul[1]/li/a[2]/text()")
+    if not artist_name:
+        print("Warning: Artist name not found on the page.")
+        return None
+
+    # Clean up and format the artist name
+    artist_name = artist_name[0].strip()
+    artist_name = ''.join(c if c.isalnum() or c.isspace() else '_' for c in artist_name).lower().strip()
+
+    return artist_name
+
+def update_stash():
+    print("Running scan for new items in Stash...")
+    url = f"http://{STASH_HOST}:{STASH_PORT}/graphql"
+    headers = {
+        "ApiKey": STASH_API_KEY,
+        "Content-Type": "application/json",
+    }
+    data = '{"query": "mutation { metadataScan (input:{useFileMetadata: false})}" }'
+    try:
+        response = requests.post(url, headers=headers, data=data)
+        if response.ok:
+            print("Update successful!")
+        else:
+            print(f"Update failed with status code: {response.status_code}")
+            print(response.text)
+            exit(1)
+    except requests.exceptions.RequestException as e:
+        print(f"Update error: {e}")
+        exit(1)
+
+def download_file(file_url, download_dir, ytdlp_prefix):
+    extensions = "(jpg|JPG|jpeg|JPEG|png|PNG|gif|GIF|mp4|MP4)"
+    rgx_file = r"^.*\.{0}$".format(extensions)
+    rgx_filename = r"[A-Za-z0-9_]*\.{0}".format(extensions)
+    rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))'
+    rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions)
+    rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions)
+
+    artist = None
+    if re.match(rgx_booru, file_url):
+        artist = find_booru_artist(file_url)
+        print("Artist is:", artist)
+        file_url = re.search(rgx_booru_v2, requests.get(file_url).text).group(1).strip()
+
+    if not file_url:
+        return 1
+
+    if re.match(rgx_file, file_url):
+        print(STASH_PRINT_PREFIX, file_url)
+        try:
+            response = requests.get(file_url, stream=True)
+            if response.status_code == 200:
+                filename = ((artist + "__") if artist else "") + re.search(rgx_filename, file_url).group(0)
+                download_path = os.path.join(download_dir, filename)
+
+                exists = os.path.isfile(download_path)
+
+                if exists and not any(arg in sys.argv for arg in ("--overwrite", "-o")):
+                    print("Destination file already exists:", filename)
+                    return False
+
+                with open(download_path, 'wb') as f:
+                    for chunk in response.iter_content(8192):
+                        f.write(chunk)
+                print("Saved as:", filename)
+                return True
+            elif response.status_code == 403:
+                print("Error: HTTP 403 Forbidden - Access to the file is forbidden.")
+                return False
+            else:
+                print(f"Error: Failed to download the file. Status code: {response.status_code}")
+                return False
+        except requests.exceptions.RequestException as e:
+            print(f"Error: {e}")
+            return False
+    else:
+        print(STASH_PRINT_PREFIX, file_url)
+        download_path = os.path.join(download_dir, ytdlp_prefix)
+        command = ['yt-dlp', file_url, '-o', download_path]
+        try:
+            subprocess.run(command, check=True)
+            return True
+        except subprocess.CalledProcessError as e:
+            print(f"Failed to run yt-dlp command. Error: {e}")
+            return False
+
+def find_path_or_url_arg(): #chatgpt
+    global url_pattern
+    for arg in sys.argv[1:]:
+        # Check if the argument is a valid file path
+        if os.path.exists(arg):
+            return arg
+        
+        # Use regular expression to check if the argument matches a URL pattern
+        if re.match(url_pattern, arg):
+            return arg
+
+    # If no path or URL argument is found, return None
+    return None
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Download files or update stash.")
+    parser.add_argument("url_or_path", metavar="URL_or_path", nargs="?", help="URL or file path to download")
+    parser.add_argument("-o", "--overwrite", action="store_true", help="Overwrite existing files if present")
+    parser.add_argument("-u", "--update", action="store_true", help="Update stash")
+    parser.add_argument("-n", "--no-update", action="store_true", help="Do not update stash")
+
+    args = parser.parse_args()
+
+    if args.update and args.no_update:
+        print("Conflicting arguments: --update and --no-update cannot be used together.")
+        exit(1)
+
+    if args.update:
+        update_stash()
+        exit(0)
+
+    url_or_path = args.url_or_path
+    if url_or_path is None:
+        print("Valid URL or file path required")
+        exit(1)
+    elif re.match(url_pattern, url_or_path):
+        # Download using yt-dlp
+        if not download_file(url_or_path, STASH_IMPORT_DIR, STASH_YTDLP_FORMAT):
+            print("Stopped")
+            exit(1)
+    else:
+        is_file = subprocess.check_output(["file", "-0", url_or_path]).decode().split("\x00")[1]
+        if "text" in is_file:
+            # Download as multiple URLs from the provided source file
+            print(f"Reading list of {sum(1 for _ in open(url_or_path))} URL(s)")
+            with open(url_or_path) as source_file:
+                for url in source_file:
+                    download_file(url.strip(), STASH_IMPORT_DIR, STASH_YTDLP_FORMAT)
+        else:
+            subprocess.run(["rsync", url_or_path, STASH_IMPORT_DIR], check=True)
+
+    # Update stash
+    if not args.no_update:
+        update_stash()