Better URL recognition for imageboards
This commit is contained in:
parent
cccd548ee1
commit
bb507a2778
|
@ -73,11 +73,17 @@ def download_file(file_url, download_dir, ytdlp_format):
|
|||
rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))'
|
||||
rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions)
|
||||
rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions)
|
||||
rgx_booru_v2_vid = r'https?://video-cdn[0-9]{1}.*booru.*\.com/images/.*\.mp4'
|
||||
|
||||
artist = None
|
||||
if re.match(rgx_booru, file_url):
|
||||
artist = find_booru_artist(file_url)
|
||||
file_url = re.search(rgx_booru_v2, requests.get(file_url).text).group(1).strip()
|
||||
booru_url = file_url
|
||||
file_url = re.search(rgx_booru_v2, requests.get(booru_url).text)
|
||||
file_url = file_url.group(1).strip() if file_url else None
|
||||
if not file_url: #try video
|
||||
file_url = re.search(rgx_booru_v2_vid, requests.get(booru_url).text)
|
||||
file_url = file_url.group(0).strip() if file_url else None
|
||||
|
||||
if not file_url:
|
||||
return 1
|
||||
|
|
Loading…
Reference in New Issue