reorg to make better use of volumes

2022-01-28 00:22:33 -06:00
parent be3097ee8c
commit e7aa6b062c
11 changed files with 13 additions and 8 deletions
--- a/src/kwaylon/init.py
+++ b/src/kwaylon/init.py
@@ -0,0 +1,3 @@
+from .jokes import Joke, GifJoke
+from .kwaylon import Kwaylon
+from .reactions import ReactionData
--- a/src/kwaylon/jokes/init.py
+++ b/src/kwaylon/jokes/init.py
@@ -0,0 +1,11 @@
+from . import jokes
+from .base import Joke, GifJoke
+
+
+def collect_jokes():
+    for name in dir(jokes):
+        try:
+            if issubclass((joke := getattr(jokes, name)), Joke):
+                yield joke()
+        except TypeError as e:
+            continue
--- a/src/kwaylon/jokes/base.py
+++ b/src/kwaylon/jokes/base.py
@@ -0,0 +1,22 @@
+import re
+
+import nextcord as discord
+
+
+class Joke:
+    @property
+    def regex(self) -> re.Pattern:
+        raise NotImplementedError
+
+    def scan(self, message: discord.Message) -> re.Match:
+        return self.regex.search(message.content)
+
+    async def respond(self, message: discord.Message, client: discord.Client, match: re.Match):
+        raise NotImplementedError
+
+
+class GifJoke(Joke):
+    url: str
+
+    async def respond(self, message: discord.Message, client: discord.Client, match: re.Match):
+        await message.reply(self.url)
--- a/src/kwaylon/jokes/helpers.py
+++ b/src/kwaylon/jokes/helpers.py
@@ -0,0 +1,42 @@
+import nltk
+import requests
+from bs4 import BeautifulSoup
+
+pattern = 'NP: {<DT>?<JJ>*<NN>}'
+cp = nltk.RegexpParser(pattern)
+
+
+def get_stock_price(symbol: str):
+    soup = BeautifulSoup(requests.get(f'https://finance.yahoo.com/quote/{symbol}').content, 'lxml')
+    tag = soup.select_one(f'fin-streamer[data-symbol="{symbol}"]')
+    return float(tag['value'])
+
+
+def token_list(s):
+    return nltk.chunk.tree2conlltags(
+        cp.parse(
+            nltk.pos_tag(
+                nltk.word_tokenize(s)
+            )))
+
+
+def assify(s):
+    tag_list = token_list(s)
+    for i, (text, tag, iob) in enumerate(tag_list):
+        if text[-3:].lower() == 'ass':
+            try:
+                next_tag = tag_list[i + 1][1]
+                if next_tag == 'NN' or next_tag == 'NNS':
+                    return f'ass-{tag_list[i + 1][0]}'
+            except IndexError as e:
+                return
+
+
+def unblack(s):
+    tag_list = token_list(s)
+    for i, (text, tag, iob) in enumerate(tag_list):
+        if text.lower() == 'black':
+            if tag.startswith('JJ') or tag.startswith('NN'):
+                for text, tag, iob in tag_list[i + 1:]:
+                    if tag.startswith('NN'):
+                        return f'Or as I would say, {text.lower()}'
--- a/src/kwaylon/kwaylon.py
+++ b/src/kwaylon/kwaylon.py
@@ -0,0 +1,156 @@
+import asyncio
+import logging
+import re
+from datetime import timedelta, datetime
+from pathlib import Path
+from typing import List
+
+import pandas as pd
+from nextcord import Client, Message, TextChannel
+from nextcord import RawReactionActionEvent, Emoji
+from nextcord import utils
+
+from . import jokes
+from .reactions import ReactionData
+
+LIL_STINKY_ID = 704043422276780072
+
+LOGGER = logging.getLogger(__name__)
+
+
+class Kwaylon(Client):
+    # db_path: Path = Path(r'../data/messages.db')
+
+    def __init__(self, limit: int = 5000, days: int = 30, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.db_path = Path.cwd().parents[0] / 'data' / 'messages.db'
+
+        self.limit, self.days = limit, days
+        self.jokes = list(jokes.collect_jokes())
+        self.lock = asyncio.Lock()
+
+        self.most_regex = re.compile('most\s+(?P<emoji>\S+)')
+        self.leaderboard_regex = re.compile(
+            '^most\s*?(?P<emoji>\S+?)\s*?(leaderboard|((?:.+?(?P<days>\d+) days)))',
+            re.IGNORECASE
+        )
+
+    def text_channels(self) -> List[TextChannel]:
+        return [chan for chan in self.get_all_channels() if isinstance(chan, TextChannel)]
+
+    def robotics_facility(self) -> TextChannel:
+        for chan in self.text_channels():
+            if chan.name == 'robotics-facility' and chan.guild.name == 'Family Dinner':
+                return chan
+
+    def kaylon_emoji(self) -> Emoji:
+        return utils.get(self.emojis, name='kaylon')
+
+    async def handle_ready(self):
+        async def alive():
+            channel: TextChannel = self.robotics_facility()
+            await channel.send('https://tenor.com/view/terminator-im-back-gif-19144173')
+            await channel.send(self.kaylon_emoji())
+
+        # await alive()
+
+        self.data = ReactionData(self.db_path)
+        self.data.read_all()
+
+    async def handle_message(self, message: Message):
+        if message.author != self.user:
+            for mention in message.mentions:
+                if mention.id == self.user.id and 'read' in message.content:
+                    if (m := re.search('(\d+) days', message.content)):
+                        days = int(m.group(1))
+                    else:
+                        days = self.days
+
+                    await self.data.scan_messages(client=self, limit=self.limit, days=days)
+                    return
+
+            if (most_match := self.most_regex.match(message.content)):
+                emoji_ref = most_match.group('emoji')
+                emoji_name = get_emoji_name(emoji_ref)
+                LOGGER.info(f'Most {emoji_name}')
+
+                with self.data.connect() as con:
+                    df = self.data.read_emoji(emoji_name, con)
+                con.close()
+
+                days = get_days(message.content) or 14
+                df = filter_days(df, days)
+
+                if df.shape[0] > 0:
+                    LOGGER.info(f'{df.shape[0]} messages with {emoji_ref} after filtering')
+
+                    if 'leaderboard' in message.content:
+                        LOGGER.info(f'Building leaderboard')
+                        res = f'{emoji_ref} totals, past {days} days\n'
+                        if (board := await self.leaderboard(df)) is not None:
+                            res += board
+                            await message.reply(res)
+                    else:
+                        most = df.sort_values('count').iloc[-1]
+                        msg = await self.fetch_message(most)
+                        await message.reply(f'{msg.jump_url}')
+                else:
+                    await message.reply(f"NObody (in the past {days} days)...gah, leave me alone!")
+                LOGGER.info(f'Done')
+                return
+
+            for joke in self.jokes:
+                if (joke_match := joke.scan(message)) is not None:
+                    LOGGER.info(f'{joke.__class__.__name__} detected: {message.content}, {m.group()}')
+                    await joke.respond(message, self, joke_match)
+
+    async def leaderboard(self, df: pd.DataFrame) -> str:
+        df = df.groupby('auth_id').sum()
+        counts = df['count'].sort_values(ascending=False)
+        counts.index = [(await self.fetch_user(idx)).display_name for idx in counts.index]
+
+        width = max([len(str(s)) for s in counts.index])
+
+        res = '\n'.join(
+            f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
+            for name, cnt in counts.iteritems()
+        )
+        return res
+
+    async def handle_raw_reaction(self, payload: RawReactionActionEvent):
+        LOGGER.info(payload)
+
+        guild = await self.fetch_guild(payload.guild_id)
+        channel = await guild.fetch_channel(payload.channel_id)
+        message = await channel.fetch_message(payload.message_id)
+
+        async with self.lock:
+            with self.data.connect() as con:
+                self.data.add_reactions_from_message(message, con)
+            con.close()
+
+    async def fetch_message(self, row: pd.Series):
+        guild = await self.fetch_guild(row['guild_id'])
+        channel = await guild.fetch_channel(row['channel_id'])
+        return await channel.fetch_message(row['msg_id'])
+
+
+def get_emoji_name(string: str) -> str:
+    if (m := re.search('<:(?P<name>\w+):(?P<id>\d+)>', string)):
+        string = m.group('name')
+    return string.lower().strip()
+
+
+day_regex = re.compile('(?P<days>\d+) days')
+
+
+def get_days(input_str):
+    if (m := day_regex.search(input_str)):
+        return int(m.group('days'))
+
+
+def filter_days(df: pd.DataFrame, days: int) -> pd.DataFrame:
+    start = (datetime.today() - timedelta(days=days)).astimezone()
+    valid_dates = df['datetime'] > start
+    df = df.loc[valid_dates]
+    return df
--- a/src/kwaylon/msg.py
+++ b/src/kwaylon/msg.py
@@ -0,0 +1,57 @@
+import logging
+from datetime import datetime, timedelta
+
+import pandas as pd
+from nextcord import Client, Message, Reaction
+from nextcord import TextChannel
+from nextcord.utils import AsyncIterator
+
+LOGGER = logging.getLogger(__name__)
+
+
+async def message_gen(client: Client, limit: int = None, days: int = 90, **kwargs) -> AsyncIterator[Message]:
+    if 'after' not in kwargs:
+        kwargs['after'] = (datetime.today() - timedelta(days=days))
+    elif isinstance((after := kwargs.get('after', None)), datetime):
+        kwargs['after'] = after.replace(tzinfo=None)
+
+    kwargs['limit'] = limit
+
+    LOGGER.info(kwargs)
+    for channel in client.get_all_channels():
+        if channel.category is not None and channel.category.name != 'Archive':
+            if isinstance(channel, TextChannel):
+                LOGGER.info(f'Channel: {channel.category}: {channel.name}')
+                async for msg in channel.history(**kwargs):
+                    yield msg
+                for thread in channel.threads:
+                    LOGGER.info(f'Thread: {channel.category}: {channel.name}: {thread.name}')
+                    async for msg in thread.history(**kwargs):
+                        yield msg
+            else:
+                continue
+    else:
+        LOGGER.info(f'Done getting messages')
+
+
+def reaction_dict(reaction: Reaction):
+    return {
+        'msg_id': reaction.message.id,
+        'emoji': reaction.emoji.name if reaction.is_custom_emoji() else reaction.emoji,
+        'emoji_id': reaction.emoji.id if reaction.is_custom_emoji() else None,
+        'channel_id': reaction.message.channel.id,
+        'guild_id': reaction.message.channel.guild.id,
+        'auth_id': reaction.message.author.id,
+        'count': int(reaction.count),
+        'datetime': reaction.message.created_at.astimezone(),
+    }
+
+
+async def reaction_gen(client: Client, **kwargs) -> AsyncIterator[Reaction]:
+    async for msg in message_gen(client=client, **kwargs):
+        for reaction in msg.reactions:
+            yield reaction_dict(reaction)
+
+
+async def reaction_df(client: Client, **kwargs):
+    return pd.DataFrame([r async for r in reaction_gen(client=client, **kwargs)])
--- a/src/kwaylon/reactions.py
+++ b/src/kwaylon/reactions.py
@@ -0,0 +1,63 @@
+import logging
+import sqlite3
+from dataclasses import dataclass
+from pathlib import Path
+
+import pandas as pd
+from nextcord import Message, Client
+
+from .msg import reaction_dict, message_gen
+
+LOGGER = logging.getLogger(__name__)
+
+
+@dataclass
+class ReactionData:
+    path: Path
+
+    def connect(self, *args, **kwargs) -> sqlite3.Connection:
+        return sqlite3.connect(self.path, *args, **kwargs)
+
+    async def scan_messages(self, client: Client, **kwargs):
+        try:
+            with self.connect() as con:
+                async for msg in message_gen(client=client, **kwargs):
+                    if len(msg.reactions) > 0:
+                        self.add_reactions_from_message(msg, con)
+        except Exception as e:
+            LOGGER.exception(e)
+        finally:
+            con.close()
+
+    def add_reactions_from_message(self, msg: Message, con: sqlite3.Connection = None):
+        con = con or sqlite3.connect(self.path)
+
+        try:
+            con.execute(f'DELETE FROM reactions WHERE msg_id = {msg.id}')
+
+            data = [tuple(reaction_dict(reaction).values()) for reaction in msg.reactions]
+            query = f'INSERT INTO reactions VALUES({",".join("?" for _ in range(8))})'
+            con.executemany(query, data)
+        except Exception as e:
+            LOGGER.exception(e)
+        # else:
+        #     LOGGER.info(f'Wrote {len(data)} rows to {self.path.name}')
+
+    def read_emoji(self, emoji: str, con: sqlite3.Connection = None) -> pd.DataFrame:
+        return self.read_sql(query=f"SELECT * FROM reactions WHERE emoji LIKE '{emoji}'", con=con)
+
+    def read_all(self, con: sqlite3.Connection = None) -> pd.DataFrame:
+        return self.read_sql(query='SELECT * FROM reactions', con=con)
+
+    def read_sql(self, query: str, con: sqlite3.Connection = None):
+        close = con is None
+        con = con or sqlite3.connect(self.path)
+
+        res = pd.read_sql(query, con=con, index_col=None)
+        LOGGER.info(f'Read {res.shape[0]} reactions')
+        if close:
+            con.close()
+
+        res['datetime'] = pd.to_datetime(res['datetime'])
+
+        return res.sort_values('count', ascending=False)