Large initial commit

2024-12-22 15:03:34 +02:00 · 2022-08-31 13:20:49 +03:00 · 2022-08-31 13:20:49 +03:00 · 838c25b639
commit 838c25b639
12 changed files with 585 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+**/__pycache__
+.auth
+.auth_kinopoisk
+.app_sessions
+*.sqlite
+
--- a/auth_helper.sh
+++ b/auth_helper.sh
@ -0,0 +1,58 @@
+#!/bin/sh
+default_auth_browser()
+{
+	w3m $@
+}
+
+mkdir -m 711 -p .app_sessions
+
+
+echo 'Input instance (example.example)'
+read instance
+
+echo 'Input client/app name'
+read client_name
+
+export instance_point="https://$instance/api/v1"
+
+auth_api_create_client()
+{
+	if [ ! -e ".app_sessions/$instance" ]; then
+		curl -s --compressed --url "$instance_point/apps" \
+		--data-urlencode "client_name=$client_name" \
+		--data-urlencode 'redirect_uris=urn:ietf:wg:oauth:2.0:oob' \
+		--data-urlencode 'scopes=read write follow' \
+		--output ".app_sessions/$instance" \
+		--create-file-mode 0600
+	fi
+}
+
+auth_api_get_code()
+{
+	auth_api_create_client
+	client_id=$(jq -r '.client_id' ".app_sessions/$instance")
+	default_auth_browser "https://$instance/oauth/authorize?client_id=$client_id&response_type=code&redirect_uri=urn:ietf:wg:oauth:2.0:oob&scope=read+write+follow"
+	echo 'Input token-code:'
+	read pass
+}
+
+auth_api_get_token()
+{
+	auth_api_get_code
+	clear
+	client_id=$(jq -r '.client_id' ".app_sessions/$instance")
+	client_secret=$(jq -r '.client_secret' ".app_sessions/$instance")
+	token=$(curl -s --compressed --url "https://$instance/oauth/token" \
+	--data-urlencode 'grant_type=authorization_code' \
+	--data-urlencode "client_id=$client_id" \
+	--data-urlencode "client_secret=$client_secret" \
+	--data-urlencode "redirect_uri=urn:ietf:wg:oauth:2.0:oob" \
+	--data-urlencode 'scope=read write follow' \
+	--data-urlencode "code=$pass" | jq -r '.access_token')
+	echo > "$basedir"/.auth
+	chmod 600 "$basedir"/.auth
+	echo "$token" > "$basedir"/.auth
+}
+
+auth_api_get_token
+
--- a/config.py
+++ b/config.py
@ -0,0 +1,9 @@
+main_admin_bot = '' # Ник админа бота
+bot_acct = '' # Ник бота на инстансе
+instance = '' # Инстанс, где будет запущен бот
+limit_movies_per_user = 2 # Ограничение количества фильмов на одного пользователя
+limit_all_movies_poll = 20 # Сколько можно добавить всего фильмов
+hour_poll_posting=16 # Час в который будет создан пост с голосовалкой
+
+logger_default_level=10 # Уровень логгирования 10 - DEBUG, 20 - INFO, 30 - WARN
+
--- a/fmn_bot.py
+++ b/fmn_bot.py
@ -0,0 +1,17 @@
+from src import listener_context, listener_mention, imdb_datasets_worker
+from config import logger_default_level
+import logging
+
+
+
+def main():
+    # imdb_datasets_worker.convert_datasets_to_db()
+
+    logging.basicConfig(level=logger_default_level)
+
+    #listener_mention.run_scan_notif() # Слушаем упоминания в фоне
+
+    listener_context.scan_context_thread() # Слушаем тред на новые предложения фильмов
+
+if __name__ == '__main__':
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+requests
+dateutil
+
--- a/src/fedi_api.py
+++ b/src/fedi_api.py
@ -0,0 +1,56 @@
+import json
+import requests
+from config import instance
+
+instance_point = f"https://{instance}/api/v1"
+
+with open(".auth", mode='rt') as auth:
+    tkn = auth.read().replace('\n', '')
+
+headers= {
+        "Authorization": "Bearer " + tkn
+}
+
+def get_notifications():
+    params = {
+        "limit": 15,
+        "type": "mention"
+    }
+    r = requests.get(instance_point + "/notifications", params, headers=headers)
+    return r.json()
+
+
+def mark_as_read_notification(id_notification):
+    r = requests.post(instance_point + f"/notifications/{id_notification}/dismiss", headers=headers)
+    return r.json()
+
+
+def get_status_context(status_id):
+    r = requests.get(instance_point + f"/statuses/{status_id}/context", headers=headers)
+    return r.json()
+
+
+def get_status(status_id):
+    r = requests.get(instance_point + f"/statuses/{status_id}", headers=headers)
+    return r.json()
+
+
+def post_status(text, reply_to_status_id=None, poll_options=None, poll_expires=345600):
+    poll = None
+    if poll_options is not None:
+        poll = {
+            "options": poll_options,
+            "expires_in": poll_expires,
+            "multiple": True
+        }
+    print(poll_options)
+    params = {
+        "status": text,
+        "in_reply_to_id": reply_to_status_id,
+        "visibility": "unlisted",
+        "content_type": "text/plain",
+        "poll": poll
+    }
+    r = requests.post(instance_point + "/statuses", json=params, headers=headers)
+    return r.json()
+
--- a/src/fmn_database.py
+++ b/src/fmn_database.py
@ -0,0 +1,87 @@
+import sqlite3
+import logging
+from config import limit_movies_per_user
+
+
+logger = logging.getLogger('fmn_db')
+
+conn = sqlite3.connect("fmn.sqlite")
+c = conn.cursor()
+
+c.execute(
+    '''CREATE TABLE IF NOT EXISTS watched_movies(original_name VARCHAR (500) DEFAULT NULL, ru_name VARCHAR (500) DEFAULT NULL, year INTEGER DEFAULT NULL)''')
+c.execute(
+    '''CREATE TABLE IF NOT EXISTS poll(user_suggested VARCHAR (500) DEFAULT NULL, original_name VARCHAR (500) DEFAULT NULL, ru_name VARCHAR (500) DEFAULT NULL, year INTEGER DEFAULT NULL, votes INTEGER)''')
+
+conn.commit()
+
+
+def mark_as_watched_movie(original_name=None, ru_name=None, year=None):
+    try:
+        year = int(year)
+    except:
+        year = None
+    c.execute("INSERT OR IGNORE INTO watched_movies(original_name, ru_name, year) VALUES (?, ?, ?)",
+                (original_name, ru_name, year))
+    conn.commit()
+    logger.info(f'Watched movie added to db: ' + str((original_name, ru_name, year)))
+
+
+def get_already_watched(original_name=None, ru_name=None, year=None):
+    already_watched = c.execute(f"SELECT * FROM watched_movies WHERE original_name == (?) AND ru_name == (?) AND year == (?)", 
+             (original_name,ru_name,year)).fetchone()
+    if already_watched is not None:
+        return True
+    else:
+        return False
+
+
+
+def per_user_accepted_movie_count(acct=str):
+    user_suggested_count = c.execute(f"SELECT user_suggested FROM poll WHERE user_suggested == (?)", (acct,)).fetchall()
+    return len(user_suggested_count)
+
+
+def add_movie_to_poll(acct=str, original_name=None, ru_name=None, year=None):
+    if per_user_accepted_movie_count(acct) >= limit_movies_per_user:
+        return False
+    else:
+        c.execute("INSERT OR IGNORE INTO poll(user_suggested, original_name, ru_name, year, votes) VALUES (?, ?, ?, ?, ?)",                          (acct, original_name, ru_name, year, 0))
+        conn.commit()
+        logger.info('Add option to poll: ' + str((acct, original_name, ru_name, year)))
+        return True
+
+
+def get_movies_for_poll():
+    list_poll = c.execute(f"SELECT * FROM poll ORDER BY year").fetchall()
+    return list_poll
+
+
+def write_votes(votes_list_tuples=list):
+    '''Запись результатов голосования'''
+    original = get_movies_for_poll()
+    print(original)
+    new = []
+    indexator = 0
+    for i in original:
+        try:
+            new.append((i[0], i[1], i[2], i[3], votes_list_tuples[indexator]))
+        except:
+            break
+        indexator += 1
+    if len(original) == len(new):
+        c.execute("DELETE FROM poll")
+        c.executemany("INSERT OR IGNORE INTO poll(user_suggested, original_name, ru_name, year, votes) VALUES (?, ?, ?, ?, ?)",                     (new))
+
+
+def read_votes():
+    list_poll = c.execute(f"SELECT * FROM poll ORDER BY votes DESC").fetchall()
+    return list_poll
+
+
+def rewrite_db(winned_movies=list):
+    '''Переписываем базу победившими фильмами, на случай tie breaker'''
+    c.execute("DELETE FROM poll")
+    c.executemany("INSERT OR IGNORE INTO poll(user_suggested, original_name, ru_name, year, votes) VALUES (?, ?, ?, ?, ?)", 
+            (winned_movies))
+    conn.commit()
--- a/src/fmn_poll.py
+++ b/src/fmn_poll.py
@ -0,0 +1,68 @@
+from collections import Counter
+from src.fedi_api import get_status, post_status
+from src.fmn_database import get_movies_for_poll, write_votes, read_votes, mark_as_watched_movie, get_already_watched, rewrite_db
+import logging
+
+logger = logging.getLogger('fmn_poll')
+
+text_create_poll = '''Друзья, голосование за следующий Fediverse Movie Night объявляю открытым!
+
+Ставки сделаны, ставок больше нет
+'''.replace('\t', '')
+
+
+def create_poll_movies(text=text_create_poll, poll_expires=345600):
+    formated_poll_options = []
+    raw_poll = get_movies_for_poll()
+    for i in raw_poll:
+        acct = i[0]
+        orig_name = i[1]
+        ru_name = i[2]
+        year = i[3]
+        poll_option_string = f"{ru_name} / {orig_name}, {year} ({acct})"
+        if ru_name is None:
+            poll_option_string = f"{orig_name}, {year} ({acct})"
+        if orig_name is None:
+            poll_option_string = f"{ru_name}, {year} ({acct})"
+        formated_poll_options.append(poll_option_string)
+
+    poll_status_id = post_status(text, None, formated_poll_options, poll_expires=poll_expires)['id']
+    logger.info('Голосовалка создана')
+    with open('poll_status_id', mode='wt') as file:
+        file.write(poll_status_id)
+    return poll_status_id
+
+
+def get_winner_movie(poll_status_id=str):
+    '''Отмечаем победивший фильм на голосовании как просмотренный и постим об этом'''
+    votes_counters = []
+    status_with_poll = get_status(poll_status_id)
+    poll = status_with_poll['poll']
+    votes_counter = Counter()
+    for option in poll['options']:
+        votes_count = option['votes_count']
+        votes_counters.append(votes_count)
+    
+    write_votes(votes_counters)
+    voted_movies = read_votes()
+    max_vote = voted_movies[0][4]
+    winned_movies = []
+    for i in voted_movies:
+        if max_vote == i[4]:
+            winned_movies.append(i)
+
+    if len(winned_movies) > 1:
+        logger.warning('Будет создан tie breaker')
+        rewrite_db(winned_movies)
+        create_tie_breaker()
+    else:
+        movie = winned_movies[0]
+        logger.warning("Победил " + str(movie))
+        mark_as_watched_movie(movie[1], movie[2], movie[3])
+
+
+def create_tie_breaker():
+    tie_poll = create_poll_movies("TIE BREAKER!!!", 8*60*60)
+    time.sleep(8*60*60) 
+    get_winner_movie(tie_poll)
+
--- a/src/imdb_datasets_worker.py
+++ b/src/imdb_datasets_worker.py
@ -0,0 +1,80 @@
+import sqlite3
+import gzip
+
+conn = sqlite3.connect("imdb_titles.sqlite")
+c = conn.cursor()
+
+c.execute(
+    '''CREATE TABLE IF NOT EXISTS titles(tt_id INTEGER UNIQUE, type VARCHAR (50), original_name VARCHAR (500) DEFAULT NULL, ru_name VARCHAR (500) DEFAULT NULL, year INTEGER DEFAULT NULL)''')
+c.execute("PRAGMA synchronous = OFF")
+c.execute("PRAGMA optimize")
+conn.commit()
+
+
+def convert_tsv_to_db(title_basics_tsv):
+    '''Конвертирование основного датасета в sqlite базу, выполняется весьма долго (5-10 минут)'''
+    with gzip.open(title_basics_tsv, mode='rt') as file:
+        for line in file:
+            line = line.split("\t")
+            try:
+                tt_id = int(line[0].split("tt")[1])
+                tt_type = line[1]
+                original_name = line[3]
+                ru_name = None
+                year = line[5]
+
+                if tt_type not in ("movie", "video"):
+                    original_name = None
+                    year = "\\N"
+                else:
+                    print(tt_id, tt_type, original_name, ru_name, year)
+
+                if year == "\\N":
+                    year = None
+                else:
+                    year = int(year)
+                c.execute("INSERT OR REPLACE INTO titles(tt_id, type, original_name, ru_name, year) VALUES (?, ?, ?, ?, ?)",
+                (tt_id, tt_type, original_name, ru_name, year))
+            except Exception as E:
+                print(E)
+                pass
+            conn.commit()
+
+def extract_ru_locale_from_tsv(title_akas_tsv):
+    '''Конвертирование датасета с локализованными названиями и последующее добавление в базу'''
+    with gzip.open(title_akas_tsv, mode='rt') as file:
+        for line in file:
+            line = line.split("\t")
+            try:
+                tt_region = line[3]
+                if tt_region != "RU": 
+                    continue
+
+                tt_id = int(line[0].split("tt")[1])
+                tt_type = c.execute(f"SELECT type FROM titles WHERE tt_id={tt_id}").fetchone()[0]
+                if tt_type not in ("movie", "video"):
+                    continue
+                ru_name = line[2]
+                print(ru_name, tt_type)
+                c.execute("UPDATE titles SET ru_name = ? WHERE tt_id = ?", (ru_name, tt_id))
+
+            except Exception as E:
+                print(E)
+                pass
+        conn.commit()
+
+def convert_datasets_to_db():
+    print("Converting tsv dataset to sqlite...")
+    convert_tsv_to_db("title.basics.tsv.gz")
+    print("Unpack ru locale...")
+    extract_ru_locale_from_tsv("title.akas.tsv.gz")
+
+
+def get_title_by_id(films_ids=list):
+    tt_list = []
+    for i in films_ids:
+        tt_film = c.execute(f"SELECT * FROM titles WHERE tt_id={i}").fetchone()
+        tt_list.append(tt_film)
+    print(tt_list)
+    return tt_list
+
--- a/src/kinopoisk_api.py
+++ b/src/kinopoisk_api.py
@ -0,0 +1,28 @@
+import requests
+import time
+
+
+kinopoisk_api_url='https://kinopoiskapiunofficial.tech/api/v2.2'
+# Используется неофициальное API кинопоиска, дефолтный лимит 500 запросов.
+
+with open(".auth_kinopoisk", mode='rt') as key_file:
+    token = key_file.read().replace('\n', '')
+
+
+def get_kinopoisk_movie(film=list):
+    films_resolved = []
+    for i in film:
+        headers = {
+        "X-API-KEY": token,
+        }
+        r = requests.get(kinopoisk_api_url + "/films/" + i, headers=headers)
+        if r.status_code == 200:
+            film_data = r.json()
+            films_resolved.append((film_data['type'], film_data['year'], film_data['nameOriginal'], film_data['nameRu']))
+        elif r.status_code in (400, 404):
+            films_resolved.append(("404",))
+        else:
+            continue
+        time.sleep(0.2)
+    return films_resolved
+
--- a/src/listener_context.py
+++ b/src/listener_context.py
@ -0,0 +1,131 @@
+import time
+from datetime import datetime
+from dateutil.parser import parse as dateutilparse
+from dateutil.relativedelta import relativedelta, TU
+
+import re
+import logging
+from config import hour_poll_posting, bot_acct
+from src.fedi_api import get_status_context, get_status, post_status
+from src.kinopoisk_api import get_kinopoisk_movie
+from src.imdb_datasets_worker import get_title_by_id
+from src.fmn_database import add_movie_to_poll, get_already_watched
+from src.fmn_poll import create_poll_movies
+
+logger = logging.getLogger('thread_listener')
+
+def parse_links(text=str):
+    regex = r"kinopoisk\.ru/"
+    if re.search(regex, text.lower(), flags=re.MULTILINE):
+        kinopoisk_ids = re.findall(r"film/(\d{1,})", text.lower())
+        return kinopoisk_ids
+    else:
+        return None
+
+
+def parse_links_imdb(text=str):
+    regex = r"imdb\.com/"
+    if re.search(regex, text.lower(), flags=re.MULTILINE):
+        imdb_ids = re.findall(r"tt(\d{1,})", text.lower())
+        return imdb_ids
+    else:
+        return None
+
+        
+def scan_context_thread():
+    status_id = None
+    poll_created = False
+    while True:
+        time_now = time.time()
+        while status_id == None:
+            try:
+                with open('last_thread_id', mode='rt') as file:
+                    status_id = file.read().replace('\n', '')
+                    thread_created_at = get_status(status_id)['created_at']
+                    thread_created_at = dateutilparse(thread_created_at)
+                    delta = relativedelta(hour=hour_poll_posting, minute=0, second=0, weekday=TU(1))
+                    stop_thread_scan = thread_created_at + delta
+                    logger.info('Сбор будет завершен в ' + stop_thread_scan.strftime('%c'))
+                    stop_thread_scan = time.mktime(time.struct_time(stop_thread_scan.timetuple()))
+            except Exception as E:
+                logger.exception(E)
+                status_id = None
+            time.sleep(1)
+        if int(time_now) >= int(stop_thread_scan):
+            logger.debug('Сбор завершён, сканирование треда на опоздавших')
+            if poll_created is False:
+                create_poll_movies()
+                poll_created = True
+        else:
+            endings = stop_thread_scan - time_now
+            logger.debug(f'Осталось до закрытия сбора: {endings}')
+        descendants = get_status_context(status_id)['descendants']
+        replyed = []
+        for status in descendants:
+            if status['account']['acct'] == bot_acct:
+                replyed.append(status['in_reply_to_id'])
+
+        for status in descendants:
+            id_st = status['id']
+            in_reply_acct = status['in_reply_to_account_id']
+            in_reply_id = status['in_reply_to_id']
+            acct = status['account']['acct']
+            content = status['pleroma']['content']['text/plain']
+
+            if id_st in replyed: # Игнорировать уже отвеченное
+                continue
+            parsed_result = parse_links(content)
+            parsed_result_imdb = parse_links_imdb(content)
+
+            if parsed_result is None and parsed_result_imdb is None:
+                continue
+
+            if poll_created is True:
+                post_status('ℹ️ Приём заявок уже окончен', id_st)
+                continue
+
+            if parsed_result is not None:
+                suggested_movies = get_kinopoisk_movie(parsed_result)
+                index_type = 0
+                index_name = 2
+                index_ru_name = 3
+                index_year = 1
+            elif parsed_result_imdb is not None:
+                suggested_movies = get_title_by_id(parsed_result_imdb)
+                index_type = 1
+                index_name = 2
+                index_ru_name = 3
+                index_year = 4
+            
+            message_writer = []
+            for movie in suggested_movies:
+                if movie[index_type] == "404":
+                    message_writer.append("❌ Не найдено.")
+                elif movie[index_type] not in ("movie", "FILM", "video"):
+                    message_writer.append(f"❌ Не принято: Мы принимаем фильмы, если они: полнометражные, художественные, не являются сериалами")
+                else:
+
+                    name = movie[index_name]
+                    name_ru = movie[index_ru_name]
+                    year = movie[index_year]
+                    movie_string = f"{name_ru} / {name}, {year}"
+                    
+                    if name is None:
+                        movie_string = f"{name_ru}, {year}"
+                    if name_ru is None:
+                        movie_string = f"{name}, {year}"
+
+                    if get_already_watched(name, name_ru, year) == True:
+                        message_writer.append(f"ℹ️Этот фильм уже был на FMN: {movie_string}")
+                        continue
+
+                    if add_movie_to_poll(acct, name, name_ru, year) == True:
+                        message_writer.append(f"✅ Принято: {movie_string}")
+                    else:
+                        post_status("❌ Вы не можете добавить больше 2х фильмов", id_st)
+            if message_writer != []:
+                post_status('\n'.join(message_writer) + "\nБлагодарим за ваше предложение!", id_st)
+
+            logger.info(str((id_st, in_reply_acct, acct, in_reply_id, content)))
+        time.sleep(30)
+
--- a/src/listener_mention.py
+++ b/src/listener_mention.py
@ -0,0 +1,42 @@
+from src.fedi_api import get_notifications, mark_as_read_notification, post_status
+from config import main_admin_bot, limit_movies_per_user, limit_all_movies_poll, hour_poll_posting
+import threading, time
+
+acct_admin_bot=main_admin_bot
+
+def get_control_mention(acct_admin_bot=str):
+    while True:
+        notif = get_notifications()
+        for i in notif:
+            if i['account']['acct'] == acct_admin_bot and i['pleroma']['is_seen'] == False and i['status']['in_reply_to_id'] == None:
+                st_id = i['status']['id']
+                post_status(start_collect_movies_text(), st_id)
+                time.sleep(0.2)
+                mark_as_read_notification(i['id'])
+                with open('last_thread_id', 'wt') as file:
+                    file.write(st_id)
+        time.sleep(30)
+
+
+def start_collect_movies_text():
+    text = f'''
+    Начинаем прием заявок на следующий вечерний киносеанс, запланированный на $дата_следующего_киносеанса в 21:00 по Москве.
+
+Напоминаем правила:
+ - Мы принимаем на просмотр полнометражные художественные фильмы;
+ - Прием варианта осуществляется путем публикации ссылки на этот фильм на IMDB или Кинопоиске в этом треде;
+ - Нам не подходят: сериалы, короткометражные и документальные фильмы;
+ - Максимальное количество вариантов, предложенных одним человеком не должно превышать $ 2;
+ - Всего может быть собрано до $ 20 фильмов;
+ - Заявки принимаются до крайнего срока, после чего будет объявлено голосование по собранным вариантам.
+
+Крайний срок подачи заявки - $дата_дедлайна.
+
+Желаем удачи.
+    '''.replace('\t', '')
+    return text
+
+def run_scan_notif():
+    scan_notif = threading.Thread(target=get_control_mention, args=(acct_admin_bot,), daemon=True)
+    scan_notif.start()
+