From 68252d61023e1cf398cca455d9818bf5ae8c1dce Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 19 Dec 2024 21:52:26 +0300 Subject: [PATCH] theme finder in separated definition --- src/autobioebbing.py | 168 ++++++++++++++++++++++--------------------- 1 file changed, 86 insertions(+), 82 deletions(-) diff --git a/src/autobioebbing.py b/src/autobioebbing.py index 0dedb10..61ba731 100644 --- a/src/autobioebbing.py +++ b/src/autobioebbing.py @@ -10,6 +10,88 @@ import random import re +def find_infect_theme(text): + # NOTE: theme hell... any ideas for improvment required + # but not use huge regular expression like|that|fuckin|way|a|aaaa|aaaaaaaa + # because it makes re.findall like mess... + default_bioexpr_theme = r"Прибыль: ([0-9\.\,k]+)" + default_infected_days_theme = r' на ([0-9\ ]+) д.*' + default_pathogen_remaining_theme = r'Осталось: ([0-9\ ]+)' + bio_attack_themes = ( # I guess if too many themes it will be slow, but acceptable, because python slow as is. + # current order in theme: + # ('infected', 'bio_expr', 'infected days', 'pathogen remaining') + # UA theme + (r'.* йобнув.+', + r"([0-9\.\,k]+) біо-ресурса", + default_infected_days_theme, + default_pathogen_remaining_theme), + # RU theme + (r'.* подверг.+', + default_bioexpr_theme, + default_infected_days_theme, + default_pathogen_remaining_theme), + # EN theme + (r'.* infected.+', + r"([0-9\.\,k]+) pcs\.", + r' for ([0-9\ ]+) d.*', + r'Remaining: ([0-9\ ]+)'), + # AZ theme + (r'.* сикди.+', + r"верир: ([0-9\.\,k]+)", + default_infected_days_theme, + default_pathogen_remaining_theme), + # "ПК гик" theme + (r'.* насрал.+', + r"потеряет: ([0-9\.\,k]+)", + default_infected_days_theme, + default_pathogen_remaining_theme), + # "Новогодняя" theme + (r'.* подверг заморозке.+', + default_bioexpr_theme, + default_infected_days_theme, + default_pathogen_remaining_theme), + # "Сексуальная индустрия" theme + (r'.*.+выебал.+', + r"кончила ([0-9\.\,k]+)", + r' ещё ([0-9\ ]+) д.*', + default_pathogen_remaining_theme), + # "Аферисты в сетях" theme + (r'.* атаковал.+', + r"приносит: ([0-9\.\,k]+)", + default_infected_days_theme, + default_pathogen_remaining_theme), + # UA theme [via trust] + (r'.* за допомогою довіреності зазнала зараження.+', + r"([0-9\.\,k]+) біо-ресурса", + default_infected_days_theme, + default_pathogen_remaining_theme), + # RU theme [via trust] + (r'.* при помощи доверенности подвергла заражению.+', + default_bioexpr_theme, + default_infected_days_theme, + default_pathogen_remaining_theme), + # EN theme [via trust] + (r'.* by authorization infected.+', + r"([0-9\.\,k]+) pcs\.", + r' for ([0-9\ ]+) d.*', + r'Remaining: ([0-9\ ]+)'), + # idk what is theme [via trust] + (r'.* при помощи анонимуса атаковала.+', + r'приносит: ([0-9\.\,k]+)', + default_infected_days_theme, + default_pathogen_remaining_theme), + ) + for theme in bio_attack_themes: + trying_theme_index = bio_attack_themes.index(theme) + logger.debug(f'trying theme {trying_theme_index}...') + r = re.findall(theme[0], text) + if r: + logger.debug(f'found theme {trying_theme_index}') + return r, bio_attack_themes[trying_theme_index] + return r, None + + + async def eb(client, c, conn, con, d, get_id, my_id, message_q): @client.on(events.NewMessage( pattern='.*йобнув.*|.*подверг(ла)?.*|.*infected.*|.*сикди.*|.*насрал.*|.*выебал.*|.*за допомогою довіреності.*|.*by authorization infected.*|.*при помощи анонимуса атаковала.*', @@ -24,87 +106,10 @@ async def eb(client, c, conn, con, d, get_id, my_id, message_q): logger.debug(f"in chat '{chat_name}'") states.stats_most_infect_spam_chats[chat_name] += 1 t = m.raw_text - # NOTE: theme hell... any ideas for improvment required - # but not use huge regular expression like|that|fuckin|way|a|aaaa|aaaaaaaa - # because it makes re.findall like mess... - default_bioexpr_theme = r"Прибыль: ([0-9\.\,k]+)" - default_infected_days_theme = r' на ([0-9\ ]+) д.*' - default_pathogen_remaining_theme = r'Осталось: ([0-9\ ]+)' - bio_attack_themes = ( # I guess if too many themes it will be slow, but acceptable, because python slow as is. - # current order in theme: - # ('infected', 'bio_expr', 'infected days', 'pathogen remaining') - # UA theme - (r'.* йобнув.+', - r"([0-9\.\,k]+) біо-ресурса", - default_infected_days_theme, - default_pathogen_remaining_theme), - # RU theme - (r'.* подверг.+', - default_bioexpr_theme, - default_infected_days_theme, - default_pathogen_remaining_theme), - # EN theme - (r'.* infected.+', - r"([0-9\.\,k]+) pcs\.", - r' for ([0-9\ ]+) d.*', - r'Remaining: ([0-9\ ]+)'), - # AZ theme - (r'.* сикди.+', - r"верир: ([0-9\.\,k]+)", - default_infected_days_theme, - default_pathogen_remaining_theme), - # "ПК гик" theme - (r'.* насрал.+', - r"потеряет: ([0-9\.\,k]+)", - default_infected_days_theme, - default_pathogen_remaining_theme), - # "Новогодняя" theme - (r'.* подверг заморозке.+', - default_bioexpr_theme, - default_infected_days_theme, - default_pathogen_remaining_theme), - # "Сексуальная индустрия" theme - (r'.*.+выебал.+', - r"кончила ([0-9\.\,k]+)", - r' ещё ([0-9\ ]+) д.*', - default_pathogen_remaining_theme), - # "Аферисты в сетях" theme - (r'.* атаковал.+', - r"приносит: ([0-9\.\,k]+)", - default_infected_days_theme, - default_pathogen_remaining_theme), - # UA theme [via trust] - (r'.* за допомогою довіреності зазнала зараження.+', - r"([0-9\.\,k]+) біо-ресурса", - default_infected_days_theme, - default_pathogen_remaining_theme), - # RU theme [via trust] - (r'.* при помощи доверенности подвергла заражению.+', - default_bioexpr_theme, - default_infected_days_theme, - default_pathogen_remaining_theme), - # EN theme [via trust] - (r'.* by authorization infected.+', - r"([0-9\.\,k]+) pcs\.", - r' for ([0-9\ ]+) d.*', - r'Remaining: ([0-9\ ]+)'), - # idk what is theme [via trust] - (r'.* при помощи анонимуса атаковала.+', - r'приносит: ([0-9\.\,k]+)', - default_infected_days_theme, - default_pathogen_remaining_theme), - ) - if len(m.entities) > 1: h = utils.sanitize_parse_mode( 'html').unparse(t, m.entities) # HTML - for theme in bio_attack_themes: - trying_theme_index = bio_attack_themes.index(theme) - logger.debug(f'trying theme {trying_theme_index}...') - r = re.findall(theme[0], h) - if r: - logger.debug(f'found theme {trying_theme_index}') - break + r, bio_attack_theme = find_infect_theme(h) if r == []: logger.warning( 'theme not found or lost part of message, showing original message: ' + m.text) @@ -116,12 +121,11 @@ async def eb(client, c, conn, con, d, get_id, my_id, message_q): u2id = await get_id(u2url) bio_excludes = [x[0] for x in c.execute( 'select user_id from avocado_exclude').fetchall()] - # print(f'{u1url} [@{u1id}] подверг(ла) {u2url} [@{u2id}]')#показать when = int(datetime.timestamp(m.date)) - days = int(re.findall(bio_attack_themes[trying_theme_index][2], t)[ + days = int(re.findall(bio_attack_theme[2], t)[ 0].replace(' ', '')) experience = re.findall( - bio_attack_themes[trying_theme_index][1], t)[0].strip() + bio_attack_theme[1], t)[0].strip() if ',' in experience: experience = re.sub(r',', r'.', experience) if 'k' in experience: @@ -130,7 +134,7 @@ async def eb(client, c, conn, con, d, get_id, my_id, message_q): else: exp_int = int(experience) pathogen_remaining = int(re.findall( - bio_attack_themes[trying_theme_index][3], t)[0]) + bio_attack_theme[3], t)[0]) if pathogen_remaining <= states.auto_bioeb_pathogen_threshold and u1id == my_id: states.auto_bioeb_sleep_interval = states.auto_bioeb_max_interval logger.warning(