theme finder in separated definition

This commit is contained in:
Your Name 2024-12-19 21:52:26 +03:00
parent 722a3d95af
commit 68252d6102

View file

@ -10,6 +10,88 @@ import random
import re
def find_infect_theme(text):
# NOTE: theme hell... any ideas for improvment required
# but not use huge regular expression like|that|fuckin|way|a|aaaa|aaaaaaaa
# because it makes re.findall like mess...
default_bioexpr_theme = r"Прибыль: ([0-9\.\,k]+)"
default_infected_days_theme = r' на ([0-9\ ]+) д.*'
default_pathogen_remaining_theme = r'Осталось: ([0-9\ ]+)'
bio_attack_themes = ( # I guess if too many themes it will be slow, but acceptable, because python slow as is.
# current order in theme:
# ('infected', 'bio_expr', 'infected days', 'pathogen remaining')
# UA theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> йобнув.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) біо-ресурса",
default_infected_days_theme,
default_pathogen_remaining_theme),
# RU theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> подверг.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
default_bioexpr_theme,
default_infected_days_theme,
default_pathogen_remaining_theme),
# EN theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> infected.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) pcs\.",
r' for ([0-9\ ]+) d.*',
r'Remaining: ([0-9\ ]+)'),
# AZ theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> сикди.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"верир: ([0-9\.\,k]+)",
default_infected_days_theme,
default_pathogen_remaining_theme),
# "ПК гик" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> насрал.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"потеряет: ([0-9\.\,k]+)",
default_infected_days_theme,
default_pathogen_remaining_theme),
# "Новогодняя" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> подверг заморозке.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
default_bioexpr_theme,
default_infected_days_theme,
default_pathogen_remaining_theme),
# "Сексуальная индустрия" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a>.+выебал.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"кончила ([0-9\.\,k]+)",
r' ещё ([0-9\ ]+) д.*',
default_pathogen_remaining_theme),
# "Аферисты в сетях" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> атаковал.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"приносит: ([0-9\.\,k]+)",
default_infected_days_theme,
default_pathogen_remaining_theme),
# UA theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> за допомогою довіреності зазнала зараження.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) біо-ресурса",
default_infected_days_theme,
default_pathogen_remaining_theme),
# RU theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> при помощи доверенности подвергла заражению.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
default_bioexpr_theme,
default_infected_days_theme,
default_pathogen_remaining_theme),
# EN theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> by authorization infected.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) pcs\.",
r' for ([0-9\ ]+) d.*',
r'Remaining: ([0-9\ ]+)'),
# idk what is theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> при помощи анонимуса атаковала.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r'приносит: ([0-9\.\,k]+)',
default_infected_days_theme,
default_pathogen_remaining_theme),
)
for theme in bio_attack_themes:
trying_theme_index = bio_attack_themes.index(theme)
logger.debug(f'trying theme {trying_theme_index}...')
r = re.findall(theme[0], text)
if r:
logger.debug(f'found theme {trying_theme_index}')
return r, bio_attack_themes[trying_theme_index]
return r, None
async def eb(client, c, conn, con, d, get_id, my_id, message_q):
@client.on(events.NewMessage(
pattern='.*йобнув.*|.*подверг(ла)?.*|.*infected.*|.*сикди.*|.*насрал.*|.*выебал.*|.*за допомогою довіреності.*|.*by authorization infected.*|.*при помощи анонимуса атаковала.*',
@ -24,87 +106,10 @@ async def eb(client, c, conn, con, d, get_id, my_id, message_q):
logger.debug(f"in chat '{chat_name}'")
states.stats_most_infect_spam_chats[chat_name] += 1
t = m.raw_text
# NOTE: theme hell... any ideas for improvment required
# but not use huge regular expression like|that|fuckin|way|a|aaaa|aaaaaaaa
# because it makes re.findall like mess...
default_bioexpr_theme = r"Прибыль: ([0-9\.\,k]+)"
default_infected_days_theme = r' на ([0-9\ ]+) д.*'
default_pathogen_remaining_theme = r'Осталось: ([0-9\ ]+)'
bio_attack_themes = ( # I guess if too many themes it will be slow, but acceptable, because python slow as is.
# current order in theme:
# ('infected', 'bio_expr', 'infected days', 'pathogen remaining')
# UA theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> йобнув.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) біо-ресурса",
default_infected_days_theme,
default_pathogen_remaining_theme),
# RU theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> подверг.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
default_bioexpr_theme,
default_infected_days_theme,
default_pathogen_remaining_theme),
# EN theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> infected.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) pcs\.",
r' for ([0-9\ ]+) d.*',
r'Remaining: ([0-9\ ]+)'),
# AZ theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> сикди.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"верир: ([0-9\.\,k]+)",
default_infected_days_theme,
default_pathogen_remaining_theme),
# "ПК гик" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> насрал.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"потеряет: ([0-9\.\,k]+)",
default_infected_days_theme,
default_pathogen_remaining_theme),
# "Новогодняя" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> подверг заморозке.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
default_bioexpr_theme,
default_infected_days_theme,
default_pathogen_remaining_theme),
# "Сексуальная индустрия" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a>.+выебал.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"кончила ([0-9\.\,k]+)",
r' ещё ([0-9\ ]+) д.*',
default_pathogen_remaining_theme),
# "Аферисты в сетях" theme
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> атаковал.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"приносит: ([0-9\.\,k]+)",
default_infected_days_theme,
default_pathogen_remaining_theme),
# UA theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> за допомогою довіреності зазнала зараження.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) біо-ресурса",
default_infected_days_theme,
default_pathogen_remaining_theme),
# RU theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> при помощи доверенности подвергла заражению.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
default_bioexpr_theme,
default_infected_days_theme,
default_pathogen_remaining_theme),
# EN theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> by authorization infected.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r"([0-9\.\,k]+) pcs\.",
r' for ([0-9\ ]+) d.*',
r'Remaining: ([0-9\ ]+)'),
# idk what is theme [via trust]
(r'<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">.*</a> при помощи анонимуса атаковала.+<a href="(tg://openmessage\?user_id=\d+|https://t\.me/\w+)">',
r'приносит: ([0-9\.\,k]+)',
default_infected_days_theme,
default_pathogen_remaining_theme),
)
if len(m.entities) > 1:
h = utils.sanitize_parse_mode(
'html').unparse(t, m.entities) # HTML
for theme in bio_attack_themes:
trying_theme_index = bio_attack_themes.index(theme)
logger.debug(f'trying theme {trying_theme_index}...')
r = re.findall(theme[0], h)
if r:
logger.debug(f'found theme {trying_theme_index}')
break
r, bio_attack_theme = find_infect_theme(h)
if r == []:
logger.warning(
'theme not found or lost part of message, showing original message: ' + m.text)
@ -116,12 +121,11 @@ async def eb(client, c, conn, con, d, get_id, my_id, message_q):
u2id = await get_id(u2url)
bio_excludes = [x[0] for x in c.execute(
'select user_id from avocado_exclude').fetchall()]
# print(f'{u1url} [@{u1id}] подверг(ла) {u2url} [@{u2id}]')#показать
when = int(datetime.timestamp(m.date))
days = int(re.findall(bio_attack_themes[trying_theme_index][2], t)[
days = int(re.findall(bio_attack_theme[2], t)[
0].replace(' ', ''))
experience = re.findall(
bio_attack_themes[trying_theme_index][1], t)[0].strip()
bio_attack_theme[1], t)[0].strip()
if ',' in experience:
experience = re.sub(r',', r'.', experience)
if 'k' in experience:
@ -130,7 +134,7 @@ async def eb(client, c, conn, con, d, get_id, my_id, message_q):
else:
exp_int = int(experience)
pathogen_remaining = int(re.findall(
bio_attack_themes[trying_theme_index][3], t)[0])
bio_attack_theme[3], t)[0])
if pathogen_remaining <= states.auto_bioeb_pathogen_threshold and u1id == my_id:
states.auto_bioeb_sleep_interval = states.auto_bioeb_max_interval
logger.warning(