big rework of emoji calculations, much simpler and more efficient now

2021-08-13 19:31:22 -05:00
parent 8ef4de693a
commit 108a51b9fe
3 changed files with 92 additions and 91 deletions
@@ -1,14 +1,14 @@
 import asyncio
 import logging
 import sqlite3
 from datetime import datetime, timedelta
 from pathlib import Path
 import discord
 import pandas as pd
 from discord import RawReactionActionEvent
-from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series, emoji_messages, \
+from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series
    emoji_totals
 LOGGER = logging.getLogger(__name__)
@@ -21,14 +21,15 @@ class MsgData:
    @classmethod
    async def create(cls, client: discord.Client, **kwargs):
        self = MsgData()
        self.msgs: pd.DataFrame = await message_df(client, **kwargs)
        self.msgs = self.msgs.sort_values('created')
        self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
        self.lock = asyncio.Lock()
-        return self
+        async with self.lock:
            self.msgs: pd.DataFrame = await message_df(client, **kwargs)
            self.msgs = self.msgs.sort_values('created')
            self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
            return self
    @classmethod
-    def from_sql(cls, db):
+    def from_sql(cls, db, local_tz='US/Central'):
        if isinstance(db, (str, Path)):
            con = sqlite3.connect(db)
        elif isinstance(db, sqlite3.Connection):
@@ -36,7 +37,7 @@ class MsgData:
        self = MsgData()
        self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
-        self.msgs['created'] = self.msgs['created'].apply(pd.to_datetime, utc=True)
+        self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
        self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
        return self
@@ -45,6 +46,8 @@ class MsgData:
            con = sqlite3.connect(db)
        elif isinstance(db, sqlite3.Connection):
            con = db
        else:
            raise TypeError(f'db argument is not a valid type: {type(db)}')
        self.msgs.drop('object', axis=1).to_sql(
            name='msgs',
@@ -69,7 +72,7 @@ class MsgData:
    def __getitem__(self, item):
        if isinstance(item, str):
-            return self.reactions.loc[pd.IndexSlice[:, item],].fillna(0).applymap(int)
+            return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False)
        elif isinstance(item, int):
            return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
@@ -96,29 +99,60 @@ class MsgData:
                self.reactions = pd.concat([self.reactions, new])
                LOGGER.info(f'\n{str(new)}')
-    def emoji_messages(self, emoji_name: str, days: int):
+    def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame:
-        res = emoji_messages(msg_df=self.msgs, react_df=self.reactions, emoji_name=emoji_name, days=days)
+        """Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column"""
-        if res is None:
+        counts = self.emoji_counts(emoji_name)
-            raise KeyError(f'No emojis found for {emoji_name}')
+
        # get the ids of messages that that have the targeted emoji
        count_ids = counts.index.drop_duplicates()
        # filter by the messages that have actually been captured in the self.msgs DataFrame
        count_ids = count_ids[count_ids.isin(self.msgs.index.get_level_values(0))]
        if count_ids.shape[0] > 0:
            res = self.msgs.loc[count_ids]
            res['count'] = counts
            if days is not None:
                res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
            return res.sort_values('created', ascending=False)
        else:
-            return res
+            raise KeyError(f'No messages found with {emoji_name} reactions')
-    def emoji_totals(self, emoji_name: str, days: int):
+    def emoji_counts(self, emoji_name: str) -> pd.Series:
-        return emoji_totals(edf=self.emoji_messages(emoji_name, days))
+        assert isinstance(emoji_name, str), f'emoji_name must be a string'
        try:
            return self.reactions.loc[pd.IndexSlice[:, emoji_name],'count'].droplevel(1).sort_values(ascending=False)
        except KeyError as e:
            LOGGER.error(
                f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis')
            raise
-    def emoji_leaderboard(self, emoji_name: str, days: int):
+    @property
-        df = self.emoji_totals(emoji_name, days)
+    def unique_emojis(self) -> pd.Index:
-        width = max(list(map(lambda s: len(str(s)), df.index.values)))
+        return self.reactions.index.get_level_values(1).drop_duplicates()
    def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series:
        """Creates a Series of the counts for each user id"""
        return (self
                .emoji_messages(emoji_name, days)
                .groupby('user id')
                .apply(lambda gdf: gdf['count'].sum())
                .sort_values(ascending=False))
    async def emoji_leaderboard(self, client: discord.Client, emoji_name: str, days: int):
        counts: pd.Series = self.emoji_totals(emoji_name, days)
        counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])
        width = max(list(map(lambda s: len(str(s)), counts.index.values)))
        res = f'{emoji_name} totals, past {days} days\n'
        res += '\n'.join(
-            f"`{str(name).ljust(width + 1)}with {row['total']:<2.0f} total`"
+            f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
-            for name, row in df.iterrows()
+            for name, cnt in counts.iteritems()
        )
        return res
    def cancellation_leaderboard(self, days):
        return self.emoji_leaderboard(emoji_name='cancelled', days=days)
    def worst_offsenses(self, user: str, days: int):
        cdf = self.emoji_messages('cancelled', days=days)
        cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
@@ -133,10 +167,7 @@ class MsgData:
        return res
    async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str:
-        data = self.emoji_totals(emoji_name=emoji, days=days)
+        data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days)
-        username = data.index[0]
+        user: discord.User = await client.fetch_user(user_id=data.index[0])
        reacted_msgs = self.emoji_messages(emoji_name=emoji, days=days)
        d = reacted_msgs.set_index('display_name')['user id'].drop_duplicates().to_dict()
        user: discord.User = await client.fetch_user(user_id=d[username])
        LOGGER.info(f'User: {user.mention}')
-        return f'{user.mention} with {data.iloc[0]["total"]:.0f} over the past {int(days)} days'
+        return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days'
@@ -5,7 +5,6 @@ from typing import Dict, Iterable
 import discord
 import pandas as pd
 import pandas.errors
 from dotenv import load_dotenv
 LOGGER = logging.getLogger(__name__)
@@ -49,7 +48,7 @@ def message_dict(m: discord.Message) -> Dict:
        'user id': m.author.id,
        'message': m.content,
        'channel': m.channel.name,
-        'channel link': f'<#{m.channel.id}>',
+        'channel link': m.channel.mention,
        'link': m.jump_url,
    }
@@ -78,38 +77,6 @@ async def reaction_dict(r: discord.Reaction) -> Dict:
    }
 def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame:
    cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values
    if emoji_name in cached_emojis:
        reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :]
        reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()]
        reacted_msgs = reacted_msgs[~reacted_msgs.index.duplicated()].sort_index()
        if reacted_msgs.shape[0] == 0:
            LOGGER.error(f'No messages found with {emoji_name} reactions')
        else:
            LOGGER.info(
                f'Found {reacted_msgs.shape[0]} messages for the leaderboard, ' + \
                f'{reactions["count"].sum():.0f} reactions total'
            )
            try:
                reacted_msgs['count'] = reacted_msgs.index.to_series().apply(
                    lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count'])
            except pandas.errors.InvalidIndexError as e:
                LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}')
                raise
            else:
                reacted_msgs = reacted_msgs[
                    reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
                reacted_msgs = reacted_msgs.sort_values('count', ascending=False)
                return reacted_msgs
    else:
        LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}')
 def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame:
    totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int)
    max_channels = (
@@ -3,7 +3,6 @@ import logging
 import os
 import re
 from pathlib import Path
 from threading import Lock
 import discord
 from dotenv import load_dotenv
@@ -26,7 +25,6 @@ class RoboPage(discord.Client):
        attrs = filter(lambda n: n.endswith('Joke') and not n.startswith('Joke'), dir(jokes))
        attrs = map(lambda n: getattr(jokes, n)(), attrs)
        self.jokes = list(attrs)
        self.lock = Lock()
        self.most_regex = re.compile("^who is the most (?P<emoji>\w+)(?: in the past (?P<days>\d+) days)?\??$",
                                     re.IGNORECASE)
        self.leaderboard_regex = re.compile('^most (?P<emoji>\w+) leaderboard$', re.IGNORECASE)
@@ -56,35 +54,40 @@ class RoboPage(discord.Client):
        if hasattr(self, 'data'):
            await self.data.add_msg(message)
-        if message.author != self.user:
+            async with self.data.lock:
-            if (m := self.leaderboard_regex.match(message.content)) is not None:
+                if message.author != self.user:
-                try:
+                    if (m := self.leaderboard_regex.match(message.content)) is not None:
-                    await message.reply(self.data.emoji_leaderboard(emoji_name=m.group('emoji').lower(), days=14))
+                        try:
-                except KeyError as e:
+                            await message.reply(await self.data.emoji_leaderboard(
-                    LOGGER.exception(e)
+                                client=self,
-                    await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")
+                                emoji_name=m.group('emoji').lower(),
                                days=14
                            ))
                        except KeyError as e:
                            LOGGER.exception(e)
                            await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")
-            elif (m := self.most_regex.match(message.content)) is not None:
+                    elif (m := self.most_regex.match(message.content)) is not None:
-                days = m.group('days') or 14
+                        days = m.group('days') or 14
-                try:
+                        try:
-                    await message.reply(
+                            await message.reply(
-                        await self.data.biggest_single(client=self, emoji=m.group('emoji').lower(), days=int(days))
+                                await self.data.biggest_single(client=self, emoji=m.group('emoji').lower(), days=int(days))
-                    )
+                            )
-                except IndexError as e:
+                        except IndexError as e:
-                    await message.reply('NObody')
+                            await message.reply('NObody')
-            elif 'not like this' in message.content.lower():
+                    elif 'not like this' in message.content.lower():
-                await message.reply(self.gifs['not like this'])
+                        await message.reply(self.gifs['not like this'])
-            elif 'beans' in message.content.lower():
+                    elif 'beans' in message.content.lower():
-                await message.reply('Somebody help! I\'ve got beans in my motherboard!\n')
+                        await message.reply('Somebody help! I\'ve got beans in my motherboard!\n')
-                await message.channel.send(self.gifs['beans'])
+                        await message.channel.send(self.gifs['beans'])
-            else:
+                    else:
-                for joke in self.jokes:
+                        for joke in self.jokes:
-                    if (scan_res := joke.scan(message)):
+                            if (scan_res := joke.scan(message)):
-                        print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}')
+                                print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}')
-                        await joke.respond(message, self, scan_res)
+                                await joke.respond(message, self, scan_res)
 if __name__ == '__main__':