From 108a51b9febed572f2d04c4a4462778b11ff47c5 Mon Sep 17 00:00:00 2001 From: jsl12 Date: Fri, 13 Aug 2021 19:31:22 -0500 Subject: [PATCH] big rework of emoji calculations, much simpler and more efficient now --- data.py | 91 +++++++++++++++++++++++++++++++++++------------------ msg.py | 35 +-------------------- robopage.py | 57 +++++++++++++++++---------------- 3 files changed, 92 insertions(+), 91 deletions(-) diff --git a/data.py b/data.py index 27b80e0..738f6f5 100644 --- a/data.py +++ b/data.py @@ -1,14 +1,14 @@ import asyncio import logging import sqlite3 +from datetime import datetime, timedelta from pathlib import Path import discord import pandas as pd from discord import RawReactionActionEvent -from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series, emoji_messages, \ - emoji_totals +from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series LOGGER = logging.getLogger(__name__) @@ -21,14 +21,15 @@ class MsgData: @classmethod async def create(cls, client: discord.Client, **kwargs): self = MsgData() - self.msgs: pd.DataFrame = await message_df(client, **kwargs) - self.msgs = self.msgs.sort_values('created') - self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist()) self.lock = asyncio.Lock() - return self + async with self.lock: + self.msgs: pd.DataFrame = await message_df(client, **kwargs) + self.msgs = self.msgs.sort_values('created') + self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist()) + return self @classmethod - def from_sql(cls, db): + def from_sql(cls, db, local_tz='US/Central'): if isinstance(db, (str, Path)): con = sqlite3.connect(db) elif isinstance(db, sqlite3.Connection): @@ -36,7 +37,7 @@ class MsgData: self = MsgData() self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id') - self.msgs['created'] = self.msgs['created'].apply(pd.to_datetime, utc=True) + self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz) self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji']) return self @@ -45,6 +46,8 @@ class MsgData: con = sqlite3.connect(db) elif isinstance(db, sqlite3.Connection): con = db + else: + raise TypeError(f'db argument is not a valid type: {type(db)}') self.msgs.drop('object', axis=1).to_sql( name='msgs', @@ -69,7 +72,7 @@ class MsgData: def __getitem__(self, item): if isinstance(item, str): - return self.reactions.loc[pd.IndexSlice[:, item],].fillna(0).applymap(int) + return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False) elif isinstance(item, int): return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int) @@ -96,29 +99,60 @@ class MsgData: self.reactions = pd.concat([self.reactions, new]) LOGGER.info(f'\n{str(new)}') - def emoji_messages(self, emoji_name: str, days: int): - res = emoji_messages(msg_df=self.msgs, react_df=self.reactions, emoji_name=emoji_name, days=days) - if res is None: - raise KeyError(f'No emojis found for {emoji_name}') + def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame: + """Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column""" + counts = self.emoji_counts(emoji_name) + + # get the ids of messages that that have the targeted emoji + count_ids = counts.index.drop_duplicates() + + # filter by the messages that have actually been captured in the self.msgs DataFrame + count_ids = count_ids[count_ids.isin(self.msgs.index.get_level_values(0))] + + if count_ids.shape[0] > 0: + res = self.msgs.loc[count_ids] + + res['count'] = counts + + if days is not None: + res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] + + return res.sort_values('created', ascending=False) else: - return res + raise KeyError(f'No messages found with {emoji_name} reactions') - def emoji_totals(self, emoji_name: str, days: int): - return emoji_totals(edf=self.emoji_messages(emoji_name, days)) + def emoji_counts(self, emoji_name: str) -> pd.Series: + assert isinstance(emoji_name, str), f'emoji_name must be a string' + try: + return self.reactions.loc[pd.IndexSlice[:, emoji_name],'count'].droplevel(1).sort_values(ascending=False) + except KeyError as e: + LOGGER.error( + f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis') + raise - def emoji_leaderboard(self, emoji_name: str, days: int): - df = self.emoji_totals(emoji_name, days) - width = max(list(map(lambda s: len(str(s)), df.index.values))) + @property + def unique_emojis(self) -> pd.Index: + return self.reactions.index.get_level_values(1).drop_duplicates() + + def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series: + """Creates a Series of the counts for each user id""" + return (self + .emoji_messages(emoji_name, days) + .groupby('user id') + .apply(lambda gdf: gdf['count'].sum()) + .sort_values(ascending=False)) + + async def emoji_leaderboard(self, client: discord.Client, emoji_name: str, days: int): + counts: pd.Series = self.emoji_totals(emoji_name, days) + counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index]) + width = max(list(map(lambda s: len(str(s)), counts.index.values))) res = f'{emoji_name} totals, past {days} days\n' res += '\n'.join( - f"`{str(name).ljust(width + 1)}with {row['total']:<2.0f} total`" - for name, row in df.iterrows() + f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`" + for name, cnt in counts.iteritems() ) return res - def cancellation_leaderboard(self, days): - return self.emoji_leaderboard(emoji_name='cancelled', days=days) - def worst_offsenses(self, user: str, days: int): cdf = self.emoji_messages('cancelled', days=days) cdf = cdf[cdf['display_name'].str.contains(user, case=False)] @@ -133,10 +167,7 @@ class MsgData: return res async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str: - data = self.emoji_totals(emoji_name=emoji, days=days) - username = data.index[0] - reacted_msgs = self.emoji_messages(emoji_name=emoji, days=days) - d = reacted_msgs.set_index('display_name')['user id'].drop_duplicates().to_dict() - user: discord.User = await client.fetch_user(user_id=d[username]) + data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days) + user: discord.User = await client.fetch_user(user_id=data.index[0]) LOGGER.info(f'User: {user.mention}') - return f'{user.mention} with {data.iloc[0]["total"]:.0f} over the past {int(days)} days' + return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days' diff --git a/msg.py b/msg.py index 15920ef..7de40f0 100644 --- a/msg.py +++ b/msg.py @@ -5,7 +5,6 @@ from typing import Dict, Iterable import discord import pandas as pd -import pandas.errors from dotenv import load_dotenv LOGGER = logging.getLogger(__name__) @@ -49,7 +48,7 @@ def message_dict(m: discord.Message) -> Dict: 'user id': m.author.id, 'message': m.content, 'channel': m.channel.name, - 'channel link': f'<#{m.channel.id}>', + 'channel link': m.channel.mention, 'link': m.jump_url, } @@ -78,38 +77,6 @@ async def reaction_dict(r: discord.Reaction) -> Dict: } -def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame: - cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values - - if emoji_name in cached_emojis: - reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :] - reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()] - reacted_msgs = reacted_msgs[~reacted_msgs.index.duplicated()].sort_index() - if reacted_msgs.shape[0] == 0: - LOGGER.error(f'No messages found with {emoji_name} reactions') - else: - LOGGER.info( - f'Found {reacted_msgs.shape[0]} messages for the leaderboard, ' + \ - f'{reactions["count"].sum():.0f} reactions total' - ) - - try: - reacted_msgs['count'] = reacted_msgs.index.to_series().apply( - lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count']) - except pandas.errors.InvalidIndexError as e: - LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}') - raise - else: - reacted_msgs = reacted_msgs[ - reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] - - reacted_msgs = reacted_msgs.sort_values('count', ascending=False) - - return reacted_msgs - else: - LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}') - - def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame: totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int) max_channels = ( diff --git a/robopage.py b/robopage.py index 6676310..81d1305 100644 --- a/robopage.py +++ b/robopage.py @@ -3,7 +3,6 @@ import logging import os import re from pathlib import Path -from threading import Lock import discord from dotenv import load_dotenv @@ -26,7 +25,6 @@ class RoboPage(discord.Client): attrs = filter(lambda n: n.endswith('Joke') and not n.startswith('Joke'), dir(jokes)) attrs = map(lambda n: getattr(jokes, n)(), attrs) self.jokes = list(attrs) - self.lock = Lock() self.most_regex = re.compile("^who is the most (?P\w+)(?: in the past (?P\d+) days)?\??$", re.IGNORECASE) self.leaderboard_regex = re.compile('^most (?P\w+) leaderboard$', re.IGNORECASE) @@ -56,35 +54,40 @@ class RoboPage(discord.Client): if hasattr(self, 'data'): await self.data.add_msg(message) - if message.author != self.user: - if (m := self.leaderboard_regex.match(message.content)) is not None: - try: - await message.reply(self.data.emoji_leaderboard(emoji_name=m.group('emoji').lower(), days=14)) - except KeyError as e: - LOGGER.exception(e) - await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!") + async with self.data.lock: + if message.author != self.user: + if (m := self.leaderboard_regex.match(message.content)) is not None: + try: + await message.reply(await self.data.emoji_leaderboard( + client=self, + emoji_name=m.group('emoji').lower(), + days=14 + )) + except KeyError as e: + LOGGER.exception(e) + await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!") - elif (m := self.most_regex.match(message.content)) is not None: - days = m.group('days') or 14 - try: - await message.reply( - await self.data.biggest_single(client=self, emoji=m.group('emoji').lower(), days=int(days)) - ) - except IndexError as e: - await message.reply('NObody') + elif (m := self.most_regex.match(message.content)) is not None: + days = m.group('days') or 14 + try: + await message.reply( + await self.data.biggest_single(client=self, emoji=m.group('emoji').lower(), days=int(days)) + ) + except IndexError as e: + await message.reply('NObody') - elif 'not like this' in message.content.lower(): - await message.reply(self.gifs['not like this']) + elif 'not like this' in message.content.lower(): + await message.reply(self.gifs['not like this']) - elif 'beans' in message.content.lower(): - await message.reply('Somebody help! I\'ve got beans in my motherboard!\n') - await message.channel.send(self.gifs['beans']) + elif 'beans' in message.content.lower(): + await message.reply('Somebody help! I\'ve got beans in my motherboard!\n') + await message.channel.send(self.gifs['beans']) - else: - for joke in self.jokes: - if (scan_res := joke.scan(message)): - print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}') - await joke.respond(message, self, scan_res) + else: + for joke in self.jokes: + if (scan_res := joke.scan(message)): + print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}') + await joke.respond(message, self, scan_res) if __name__ == '__main__':