mirror of
https://gitea.phreedom.club/localhost_frssoft/FMN_bot
synced 2024-12-23 04:43:34 +02:00
More fast convertation datasets to sqlite
This commit is contained in:
parent
cc2993e59c
commit
09b31528f9
1 changed files with 13 additions and 5 deletions
|
@ -12,8 +12,10 @@ conn.commit()
|
|||
|
||||
|
||||
def convert_tsv_to_db(title_basics_tsv):
|
||||
'''Конвертирование основного датасета в sqlite базу, выполняется весьма долго (5-10 минут)'''
|
||||
'''Конвертирование основного датасета в sqlite базу, выполняется долго (~5 минут)'''
|
||||
with gzip.open(title_basics_tsv, mode='rt') as file:
|
||||
write_dataset = []
|
||||
counter = 0
|
||||
for line in file:
|
||||
line = line.split("\t")
|
||||
try:
|
||||
|
@ -33,16 +35,20 @@ def convert_tsv_to_db(title_basics_tsv):
|
|||
year = None
|
||||
else:
|
||||
year = int(year)
|
||||
c.execute("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)",
|
||||
(tt_id, tt_type, original_name, ru_name, year))
|
||||
write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
|
||||
counter += 1
|
||||
if counter >= 1000:
|
||||
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
|
||||
write_dataset = []
|
||||
except Exception as E:
|
||||
print(E)
|
||||
pass
|
||||
conn.commit()
|
||||
conn.commit()
|
||||
|
||||
def extract_ru_locale_from_tsv(title_akas_tsv):
|
||||
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
|
||||
with gzip.open(title_akas_tsv, mode='rt') as file:
|
||||
ru_name_writer = []
|
||||
for line in file:
|
||||
line = line.split("\t")
|
||||
try:
|
||||
|
@ -56,11 +62,13 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
|
|||
continue
|
||||
ru_name = line[2]
|
||||
print(ru_name, tt_type)
|
||||
c.execute("UPDATE titles SET ru_name = ? WHERE tt_id = ?", (ru_name, tt_id))
|
||||
ru_name_writer.append((ru_name, tt_id))
|
||||
|
||||
except Exception as E:
|
||||
print(E)
|
||||
pass
|
||||
|
||||
c.executemany("UPDATE titles SET ru_name = ? WHERE tt_id = ?", ru_name_writer)
|
||||
conn.commit()
|
||||
|
||||
def convert_datasets_to_db():
|
||||
|
|
Loading…
Reference in a new issue