import asyncio import logging import sqlite3 from datetime import datetime, timedelta from pathlib import Path import discord import pandas as pd from msg import message_df, full_reaction_df, message_dict, LOGGER, reaction_df LOGGER = logging.getLogger(__name__) class MsgData: msgs: pd.DataFrame reactions: pd.DataFrame lock: asyncio.Lock @classmethod async def create(cls, client: discord.Client, **kwargs): self = MsgData() self.lock = asyncio.Lock() self.msgs: pd.DataFrame = await message_df(client, **kwargs) self.msgs = self.msgs.sort_values('created') self.reactions: pd.DataFrame = full_reaction_df(self.msgs['object'].tolist()) return self @classmethod def from_sql(cls, db, local_tz='US/Central'): if isinstance(db, (str, Path)): con = sqlite3.connect(db) elif isinstance(db, sqlite3.Connection): con = db self = MsgData() self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id') self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz) self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji']) return self def to_sql(self, db): if isinstance(db, (str, Path)): con = sqlite3.connect(db) elif isinstance(db, sqlite3.Connection): con = db else: raise TypeError(f'db argument is not a valid type: {type(db)}') self.msgs.drop('object', axis=1).to_sql( name='msgs', con=con, if_exists='replace', index=True, index_label=self.msgs.index.name ) self.reactions.drop('object', axis=1).to_sql( name='reactions', con=con, if_exists='replace', index=True, index_label=self.reactions.index.name ) def __str__(self): return str(self.msgs) + '\n\n' + str(self.reactions) def __repr__(self): return f'<{__name__}.{self.__class__.__name__} with {self.msgs.shape[0]} messages and {self.reactions.shape[0]} reactions>' def __getitem__(self, item): if isinstance(item, str): return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False) elif isinstance(item, int): return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int) async def add_msg(self, message: discord.Message): async with self.lock: mdict = message_dict(message) mdict.pop('id') self.msgs.loc[message.id] = pd.Series(mdict) LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}') async def update_reaction(self, msg: discord.Message): # Drop all the reactions for this message id, if there are any try: async with self.lock: self.reactions.drop(msg.id, level=0, axis=0, inplace=True) except KeyError as e: pass # If there are reactions on the message after the change if len(msg.reactions) > 0: new = reaction_df(msg) async with self.lock: self.reactions = pd.concat([self.reactions, new]) LOGGER.info(str(new.droplevel(level=0, axis=0).loc[:, 'count'])) if msg.id not in self.msgs.index: await self.add_msg(msg) return new def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame: """Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column""" counts: pd.Series = self.emoji_counts(emoji_name) # Get the ids of messages that that have the targeted emoji message_id_counts: pd.Index = counts.index.drop_duplicates() # There could be a situation where a message id in message_id_counts isn't actually in the self.msgs DataFrame # Filter to keep only the messages that have actually been captured in the self.msgs DataFrame message_id_counts: pd.Index = message_id_counts[message_id_counts.isin(self.msgs.index.get_level_values(0))] # If there were actually some message ids found if message_id_counts.shape[0] > 0: res: pd.DataFrame = self.msgs.loc[message_id_counts] res['count'] = counts if days is not None and days > 0: res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] return res.sort_values('created', ascending=False) else: raise KeyError(f'No messages found with {emoji_name} reactions') def emoji_counts(self, emoji_name: str) -> pd.Series: """Creates a Series indexed by message id and with the number of reactions with emoji_name as values""" assert isinstance(emoji_name, str), f'emoji_name must be a string' try: return self.reactions.loc[pd.IndexSlice[:, emoji_name], 'count'].droplevel(1).sort_values(ascending=False) except KeyError as e: LOGGER.error(f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis') LOGGER.error(f'{self.reactions.index.get_level_values(1)}') raise @property def unique_emojis(self) -> pd.Index: return self.reactions.index.get_level_values(1).drop_duplicates() def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series: """Creates a Series indexed by user id and with the number of reactions with emoji_name as values""" return (self .emoji_messages(emoji_name, days) .groupby('user id') .apply(lambda gdf: gdf['count'].sum()) .sort_values(ascending=False)) async def emoji_user_counts(self, client: discord.Client, emoji_name: str, days: int): counts: pd.Series = self.emoji_totals(emoji_name, days) counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index]) return counts def worst_offsenses(self, user: str, days: int): cdf = self.emoji_messages('cancelled', days=days) cdf = cdf[cdf['display_name'].str.contains(user, case=False)] if cdf.shape[0] > 0: res = f'{user}\'s top 5 cancellations in the last {days} days:\n' res += f'\n'.join( f'`{row["count"]:<2.0f}cancellations`\n{row["link"]}' for idx, row in cdf.iloc[:5].iterrows()) else: res = f'No cancellations for {user} in the past {days} days' return res async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str: data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days) user: discord.User = await client.fetch_user(user_id=data.index[0]) LOGGER.info(f'User: {user.mention}') return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days'