Better URL recognition for imageboards
This commit is contained in:
parent
cccd548ee1
commit
bb507a2778
|
@ -73,11 +73,17 @@ def download_file(file_url, download_dir, ytdlp_format):
|
||||||
rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))'
|
rgx_booru = r'https?://[a-z.]+/(index\.php.*id=([0-9]+)|posts/([0-9]+))'
|
||||||
rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions)
|
rgx_booru_v1 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/(.*?)))'.format(extensions)
|
||||||
rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions)
|
rgx_booru_v2 = r'(https?://.*/original/([A-Za-z0-9/_]*\.{0})|https?://img[a-z0-9.]+\.[a-z]+\.com/im)'.format(extensions)
|
||||||
|
rgx_booru_v2_vid = r'https?://video-cdn[0-9]{1}.*booru.*\.com/images/.*\.mp4'
|
||||||
|
|
||||||
artist = None
|
artist = None
|
||||||
if re.match(rgx_booru, file_url):
|
if re.match(rgx_booru, file_url):
|
||||||
artist = find_booru_artist(file_url)
|
artist = find_booru_artist(file_url)
|
||||||
file_url = re.search(rgx_booru_v2, requests.get(file_url).text).group(1).strip()
|
booru_url = file_url
|
||||||
|
file_url = re.search(rgx_booru_v2, requests.get(booru_url).text)
|
||||||
|
file_url = file_url.group(1).strip() if file_url else None
|
||||||
|
if not file_url: #try video
|
||||||
|
file_url = re.search(rgx_booru_v2_vid, requests.get(booru_url).text)
|
||||||
|
file_url = file_url.group(0).strip() if file_url else None
|
||||||
|
|
||||||
if not file_url:
|
if not file_url:
|
||||||
return 1
|
return 1
|
||||||
|
|
Loading…
Reference in New Issue