mirror of
https://gitea.phreedom.club/localhost_frssoft/FMN_bot
synced 2024-11-22 22:09:20 +02:00
More fast convertation datasets to sqlite
This commit is contained in:
parent
cc2993e59c
commit
09b31528f9
|
@ -12,8 +12,10 @@ conn.commit()
|
||||||
|
|
||||||
|
|
||||||
def convert_tsv_to_db(title_basics_tsv):
|
def convert_tsv_to_db(title_basics_tsv):
|
||||||
'''Конвертирование основного датасета в sqlite базу, выполняется весьма долго (5-10 минут)'''
|
'''Конвертирование основного датасета в sqlite базу, выполняется долго (~5 минут)'''
|
||||||
with gzip.open(title_basics_tsv, mode='rt') as file:
|
with gzip.open(title_basics_tsv, mode='rt') as file:
|
||||||
|
write_dataset = []
|
||||||
|
counter = 0
|
||||||
for line in file:
|
for line in file:
|
||||||
line = line.split("\t")
|
line = line.split("\t")
|
||||||
try:
|
try:
|
||||||
|
@ -33,8 +35,11 @@ def convert_tsv_to_db(title_basics_tsv):
|
||||||
year = None
|
year = None
|
||||||
else:
|
else:
|
||||||
year = int(year)
|
year = int(year)
|
||||||
c.execute("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)",
|
write_dataset.append((tt_id, tt_type, original_name, ru_name, year))
|
||||||
(tt_id, tt_type, original_name, ru_name, year))
|
counter += 1
|
||||||
|
if counter >= 1000:
|
||||||
|
c.executemany("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)", write_dataset)
|
||||||
|
write_dataset = []
|
||||||
except Exception as E:
|
except Exception as E:
|
||||||
print(E)
|
print(E)
|
||||||
pass
|
pass
|
||||||
|
@ -43,6 +48,7 @@ def convert_tsv_to_db(title_basics_tsv):
|
||||||
def extract_ru_locale_from_tsv(title_akas_tsv):
|
def extract_ru_locale_from_tsv(title_akas_tsv):
|
||||||
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
|
'''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
|
||||||
with gzip.open(title_akas_tsv, mode='rt') as file:
|
with gzip.open(title_akas_tsv, mode='rt') as file:
|
||||||
|
ru_name_writer = []
|
||||||
for line in file:
|
for line in file:
|
||||||
line = line.split("\t")
|
line = line.split("\t")
|
||||||
try:
|
try:
|
||||||
|
@ -56,11 +62,13 @@ def extract_ru_locale_from_tsv(title_akas_tsv):
|
||||||
continue
|
continue
|
||||||
ru_name = line[2]
|
ru_name = line[2]
|
||||||
print(ru_name, tt_type)
|
print(ru_name, tt_type)
|
||||||
c.execute("UPDATE titles SET ru_name = ? WHERE tt_id = ?", (ru_name, tt_id))
|
ru_name_writer.append((ru_name, tt_id))
|
||||||
|
|
||||||
except Exception as E:
|
except Exception as E:
|
||||||
print(E)
|
print(E)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
c.executemany("UPDATE titles SET ru_name = ? WHERE tt_id = ?", ru_name_writer)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
def convert_datasets_to_db():
|
def convert_datasets_to_db():
|
||||||
|
|
Loading…
Reference in New Issue