Better URL recognition for imageboards

This commit is contained in:
Thord Johansson 2024-02-28 10:56:27 +01:00
parent cccd548ee1
commit bb507a2778

View File

@ -73,11 +73,17 @@ def download_file(file_url, download_dir, ytdlp_format):
rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))' rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))'
rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions) rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions)
rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions) rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions)
rgx_booru_v2_vid = r'https?://video-cdn[0-9]{1}.*booru.*\.com/images/.*\.mp4'
artist = None artist = None
if re.match(rgx_booru, file_url): if re.match(rgx_booru, file_url):
artist = find_booru_artist(file_url) artist = find_booru_artist(file_url)
file_url = re.search(rgx_booru_v2, requests.get(file_url).text).group(1).strip() booru_url = file_url
file_url = re.search(rgx_booru_v2, requests.get(booru_url).text)
file_url = file_url.group(1).strip() if file_url else None
if not file_url: #try video
file_url = re.search(rgx_booru_v2_vid, requests.get(booru_url).text)
file_url = file_url.group(0).strip() if file_url else None
if not file_url: if not file_url:
return 1 return 1