Removed stash script in favour of rewritten Python version (with help from ChatGPT)
This commit is contained in:
parent
c641bd35e7
commit
23335cba3e
3 changed files with 212 additions and 6 deletions
|
@ -1,6 +1,6 @@
|
|||
STASH_IMPORT_DIR=
|
||||
STASH_API_KEY=
|
||||
STASH_YTDLP_FORMAT=
|
||||
STASH_YTDLP_FORMAT="%(title)s [%(id)s].%(ext)s"
|
||||
STASH_HOST=
|
||||
STASH_PORT=
|
||||
FANCTL_SERVO_PIN=
|
||||
|
|
42
stash.sh
42
stash.sh
|
@ -13,31 +13,64 @@ source .env
|
|||
TARGET_DIR=$(readlink -m "$STASH_IMPORT_DIR/$(date +%Y%m)")
|
||||
mkdir -p $TARGET_DIR
|
||||
|
||||
update_stash() {
|
||||
echo "Running scan for new items in Stash..."
|
||||
curl -S -s -o /dev/null -X POST -H "ApiKey: $STASH_API_KEY" -H "Content-Type: application/json" --data '{ "query": "mutation { metadataScan (input:{useFileMetadata: false})}" }' $STASH_HOST:$STASH_PORT/graphql
|
||||
}
|
||||
|
||||
find_booru_artist() {
|
||||
# TODO: Reduce html to just name
|
||||
PAGE_URL="$1"
|
||||
ARTIST_NAME=$(curl -s "$PAGE_URL" | xmllint --format --html -xpath "/html/body/div[1]/section/ul/li[1]/a/text()" - 2>/dev/null)
|
||||
ARTIST_NAME=$(echo $ARTIST_NAME | tr -dc '[:alnum:]\n\r' | tr '[:upper:]' '[:lower:]' | tr ' ' '_')
|
||||
echo $ARTIST_NAME__
|
||||
}
|
||||
|
||||
download_file() {
|
||||
FILE_URL="$1"
|
||||
|
||||
extensions="(jpg|JPG|jpeg|JPEG|png|PNG|gif|GIF|mp4|MP4)"
|
||||
rgx_file="^.*\.$extensions$"
|
||||
rgx_filename="[A-Za-z0-9_]*.$extensions"
|
||||
rgx_dbu='http(s?)://.*donmai.us.*/posts/'
|
||||
if [[ $FILE_URL =~ $rgx_dbu ]]; then
|
||||
FILE_URL=$(curl -s "$1" | grep -Eo "http(s?)://.*donmai.us.*/original/[A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4)" | grep '__' -m1)
|
||||
rgx_booru='https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))'
|
||||
rgx_booru_v1='(https?://.*/original/([A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4))|https?://img[a-z0-9.]+\.[a-z]+\.com/(?:images|/samples)/.*/([_0-9a-z]*\.(jpg|jpeg|png|gif|mp4)))'
|
||||
rgx_booru_v2='(https?://.*/original/([A-Za-z0-9/_]*\.(jpg|jpeg|png|gif|mp4))|https?://img[a-z0-9.]+\.[a-z]+\.com/images/([0-9a-z]+/)+([_0-9a-z]+\.(jpg|jpeg|png|gif|mp4)))'
|
||||
|
||||
if [[ $FILE_URL =~ $rgx_booru ]]; then
|
||||
ARTIST=$(find_booru_artist "$FILE_URL")
|
||||
echo "Artist is: $ARTIST"
|
||||
FILE_URL=$(curl -s "$FILE_URL" | grep -Eo "$rgx_booru_v2" -m1 | head -1) #| grep '__' -m1)
|
||||
fi
|
||||
|
||||
if [ -z "$FILE_URL" ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ $FILE_URL =~ $rgx_file ]]; then
|
||||
echo $STASH_PRINT_PREFIX $(echo $FILE_URL | grep -Eo "$rgx_filename")
|
||||
curl -sO "$FILE_URL" --output-dir "$2/"
|
||||
return $?
|
||||
else
|
||||
echo $STASH_PRINT_PREFIX $FILE_URL
|
||||
yt-dlp $FILE_URL -o "$2/$3"
|
||||
return $?
|
||||
fi
|
||||
}
|
||||
|
||||
if [ $1 == "--update" ]; then
|
||||
update_stash
|
||||
exit $?
|
||||
fi
|
||||
|
||||
rgx_url='^http(s?):\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&\/\/=]*$'
|
||||
|
||||
if [[ $1 =~ $rgx_url ]]; then
|
||||
# Download using yt-dlp
|
||||
download_file "$1" "$TARGET_DIR" "$STASH_YTDLP_FORMAT"
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Stopped"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
isFile=$(file -0 "$1" | cut -d $'\0' -f2)
|
||||
case "$isFile" in
|
||||
|
@ -55,5 +88,4 @@ else
|
|||
fi
|
||||
|
||||
# Update stash
|
||||
echo "Updating Stash..."
|
||||
curl -S -s -o /dev/null -X POST -H "ApiKey: $STASH_API_KEY" -H "Content-Type: application/json" --data '{ "query": "mutation { metadataScan (input:{useFileMetadata: false})}" }' $STASH_HOST:$STASH_PORT/graphql
|
||||
update_stash
|
||||
|
|
174
stasher.py
Normal file
174
stasher.py
Normal file
|
@ -0,0 +1,174 @@
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
import requests
|
||||
import subprocess
|
||||
from lxml import html
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import argparse
|
||||
|
||||
# Load variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
url_pattern = r'^http(s?):\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b[-a-zA-Z0-9@:%_\+.~#?&\/\/=]*$'
|
||||
|
||||
STASH_IMPORT_DIR = os.getenv("STASH_IMPORT_DIR_TEMP") or os.getenv("STASH_IMPORT_DIR")
|
||||
STASH_API_KEY = os.getenv("STASH_API_KEY")
|
||||
STASH_HOST = os.getenv("STASH_HOST")
|
||||
STASH_PORT = os.getenv("STASH_PORT")
|
||||
STASH_YTDLP_FORMAT = os.getenv("STASH_YTDLP_FORMAT")
|
||||
STASH_PRINT_PREFIX = os.getenv("STASH_PRINT_PREFIX")
|
||||
|
||||
def find_booru_artist(page_url):
|
||||
response = requests.get(page_url)
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f"Error: Unable to fetch page from {page_url}")
|
||||
return None
|
||||
|
||||
# Parse the HTML content
|
||||
tree = html.fromstring(response.content)
|
||||
|
||||
# Extract the artist name using XPath
|
||||
artist_name = tree.xpath("/html/body/div[1]/section/ul/li[1]/a/text()") or tree.xpath("/html/body/div[1]/div[2]/div/div/aside/section[2]/div/ul[1]/li/a[2]/text()")
|
||||
if not artist_name:
|
||||
print("Warning: Artist name not found on the page.")
|
||||
return None
|
||||
|
||||
# Clean up and format the artist name
|
||||
artist_name = artist_name[0].strip()
|
||||
artist_name = ''.join(c if c.isalnum() or c.isspace() else '_' for c in artist_name).lower().strip()
|
||||
|
||||
return artist_name
|
||||
|
||||
def update_stash():
|
||||
print("Running scan for new items in Stash...")
|
||||
url = f"http://{STASH_HOST}:{STASH_PORT}/graphql"
|
||||
headers = {
|
||||
"ApiKey": STASH_API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = '{"query": "mutation { metadataScan (input:{useFileMetadata: false})}" }'
|
||||
try:
|
||||
response = requests.post(url, headers=headers, data=data)
|
||||
if response.ok:
|
||||
print("Update successful!")
|
||||
else:
|
||||
print(f"Update failed with status code: {response.status_code}")
|
||||
print(response.text)
|
||||
exit(1)
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Update error: {e}")
|
||||
exit(1)
|
||||
|
||||
def download_file(file_url, download_dir, ytdlp_prefix):
|
||||
extensions = "(jpg|JPG|jpeg|JPEG|png|PNG|gif|GIF|mp4|MP4)"
|
||||
rgx_file = r"^.*\.{0}$".format(extensions)
|
||||
rgx_filename = r"[A-Za-z0-9_]*\.{0}".format(extensions)
|
||||
rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))'
|
||||
rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions)
|
||||
rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions)
|
||||
|
||||
artist = None
|
||||
if re.match(rgx_booru, file_url):
|
||||
artist = find_booru_artist(file_url)
|
||||
print("Artist is:", artist)
|
||||
file_url = re.search(rgx_booru_v2, requests.get(file_url).text).group(1).strip()
|
||||
|
||||
if not file_url:
|
||||
return 1
|
||||
|
||||
if re.match(rgx_file, file_url):
|
||||
print(STASH_PRINT_PREFIX, file_url)
|
||||
try:
|
||||
response = requests.get(file_url, stream=True)
|
||||
if response.status_code == 200:
|
||||
filename = ((artist + "__") if artist else "") + re.search(rgx_filename, file_url).group(0)
|
||||
download_path = os.path.join(download_dir, filename)
|
||||
|
||||
exists = os.path.isfile(download_path)
|
||||
|
||||
if exists and not any(arg in sys.argv for arg in ("--overwrite", "-o")):
|
||||
print("Destination file already exists:", filename)
|
||||
return False
|
||||
|
||||
with open(download_path, 'wb') as f:
|
||||
for chunk in response.iter_content(8192):
|
||||
f.write(chunk)
|
||||
print("Saved as:", filename)
|
||||
return True
|
||||
elif response.status_code == 403:
|
||||
print("Error: HTTP 403 Forbidden - Access to the file is forbidden.")
|
||||
return False
|
||||
else:
|
||||
print(f"Error: Failed to download the file. Status code: {response.status_code}")
|
||||
return False
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error: {e}")
|
||||
return False
|
||||
else:
|
||||
print(STASH_PRINT_PREFIX, file_url)
|
||||
download_path = os.path.join(download_dir, ytdlp_prefix)
|
||||
command = ['yt-dlp', file_url, '-o', download_path]
|
||||
try:
|
||||
subprocess.run(command, check=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Failed to run yt-dlp command. Error: {e}")
|
||||
return False
|
||||
|
||||
def find_path_or_url_arg(): #chatgpt
|
||||
global url_pattern
|
||||
for arg in sys.argv[1:]:
|
||||
# Check if the argument is a valid file path
|
||||
if os.path.exists(arg):
|
||||
return arg
|
||||
|
||||
# Use regular expression to check if the argument matches a URL pattern
|
||||
if re.match(url_pattern, arg):
|
||||
return arg
|
||||
|
||||
# If no path or URL argument is found, return None
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Download files or update stash.")
|
||||
parser.add_argument("url_or_path", metavar="URL_or_path", nargs="?", help="URL or file path to download")
|
||||
parser.add_argument("-o", "--overwrite", action="store_true", help="Overwrite existing files if present")
|
||||
parser.add_argument("-u", "--update", action="store_true", help="Update stash")
|
||||
parser.add_argument("-n", "--no-update", action="store_true", help="Do not update stash")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.update and args.no_update:
|
||||
print("Conflicting arguments: --update and --no-update cannot be used together.")
|
||||
exit(1)
|
||||
|
||||
if args.update:
|
||||
update_stash()
|
||||
exit(0)
|
||||
|
||||
url_or_path = args.url_or_path
|
||||
if url_or_path is None:
|
||||
print("Valid URL or file path required")
|
||||
exit(1)
|
||||
elif re.match(url_pattern, url_or_path):
|
||||
# Download using yt-dlp
|
||||
if not download_file(url_or_path, STASH_IMPORT_DIR, STASH_YTDLP_FORMAT):
|
||||
print("Stopped")
|
||||
exit(1)
|
||||
else:
|
||||
is_file = subprocess.check_output(["file", "-0", url_or_path]).decode().split("\x00")[1]
|
||||
if "text" in is_file:
|
||||
# Download as multiple URLs from the provided source file
|
||||
print(f"Reading list of {sum(1 for _ in open(url_or_path))} URL(s)")
|
||||
with open(url_or_path) as source_file:
|
||||
for url in source_file:
|
||||
download_file(url.strip(), STASH_IMPORT_DIR, STASH_YTDLP_FORMAT)
|
||||
else:
|
||||
subprocess.run(["rsync", url_or_path, STASH_IMPORT_DIR], check=True)
|
||||
|
||||
# Update stash
|
||||
if not args.no_update:
|
||||
update_stash()
|
Loading…
Reference in a new issue