mirror of
https://gitea.phreedom.club/localhost_frssoft/FMN_bot
synced 2024-11-22 16:19:22 +02:00
Fix datasets converter (year partially broken)
This commit is contained in:
parent
e10b6da0e0
commit
6d30acedd0
|
@ -16,6 +16,8 @@ def convert_tsv_to_db(title_basics_tsv):
|
||||||
with gzip.open(title_basics_tsv, mode='rt') as file:
|
with gzip.open(title_basics_tsv, mode='rt') as file:
|
||||||
write_dataset = []
|
write_dataset = []
|
||||||
counter = 0
|
counter = 0
|
||||||
|
chunk = 1000
|
||||||
|
progress_counter = 0
|
||||||
for line in file:
|
for line in file:
|
||||||
line = line.split("\t")
|
line = line.split("\t")
|
||||||
try:
|
try:
|
||||||
|
@ -24,23 +26,23 @@ def convert_tsv_to_db(title_basics_tsv):
|
||||||
original_name = line[3]
|
original_name = line[3]
|
||||||
ru_name = None
|
ru_name = None
|
||||||
year = line[5]
|
year = line[5]
|
||||||
|
if year.startswith(r"\N"):
|
||||||
if tt_type not in ("movie", "video"):
|
|
||||||
original_name = None
|
|
||||||
year = "\\N"
|
|
||||||
else:
|
|
||||||
print(tt_id, tt_type, original_name, ru_name, year)
|
|
||||||
|
|
||||||
if year == "\\N":
|
|
||||||
year = None
|
year = None
|
||||||
else:
|
else:
|
||||||
year = int(year)
|
year = int(year)
|
||||||
|
|
||||||
|
if tt_type not in ("movie", "video"):
|
||||||
|
original_name = None
|
||||||
|
year = None
|
||||||
|
|
||||||
write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
|
write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
|
||||||
counter += 1
|
counter += 1
|
||||||
if counter >= 1000:
|
if counter >= chunk:
|
||||||
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
|
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
|
||||||
write_dataset = []
|
write_dataset = []
|
||||||
counter = 0
|
counter = 0
|
||||||
|
progress_counter += chunk
|
||||||
|
print(f'Обработано: {progress_counter}')
|
||||||
except Exception as E:
|
except Exception as E:
|
||||||
print(E)
|
print(E)
|
||||||
pass
|
pass
|
||||||
|
@ -50,6 +52,7 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
|
||||||
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
|
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
|
||||||
with gzip.open(title_akas_tsv, mode='rt') as file:
|
with gzip.open(title_akas_tsv, mode='rt') as file:
|
||||||
ru_name_writer = []
|
ru_name_writer = []
|
||||||
|
counter = 0
|
||||||
for line in file:
|
for line in file:
|
||||||
line = line.split("\t")
|
line = line.split("\t")
|
||||||
try:
|
try:
|
||||||
|
@ -62,8 +65,9 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
|
||||||
if tt_type not in ("movie", "video"):
|
if tt_type not in ("movie", "video"):
|
||||||
continue
|
continue
|
||||||
ru_name = line[2]
|
ru_name = line[2]
|
||||||
print(ru_name, tt_type)
|
|
||||||
ru_name_writer.append((ru_name, tt_id))
|
ru_name_writer.append((ru_name, tt_id))
|
||||||
|
counter += 1
|
||||||
|
print(f'Обработано ru_name: {counter}')
|
||||||
|
|
||||||
except Exception as E:
|
except Exception as E:
|
||||||
print(E)
|
print(E)
|
||||||
|
|
Loading…
Reference in New Issue