import asyncio import logging import os import sqlite3 from datetime import datetime, timedelta from pathlib import Path from typing import Dict, Iterable import discord import pandas as pd from discord.raw_models import RawReactionActionEvent from dotenv import load_dotenv LOGGER = logging.getLogger(__name__) class MsgData: msgs: pd.DataFrame reactions: pd.DataFrame lock: asyncio.Lock @classmethod async def create(cls, client: discord.Client, **kwargs): self = MsgData() self.msgs: pd.DataFrame = await message_df(client, **kwargs) self.msgs = self.msgs.sort_values('created') self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist()) self.lock = asyncio.Lock() return self @classmethod def from_sql(cls, db): if isinstance(db, (str, Path)): con = sqlite3.connect(db) elif isinstance(db, sqlite3.Connection): con = db self = MsgData() self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id') self.msgs['created'] = self.msgs['created'].apply(pd.to_datetime, utc=True) self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji']) return self def to_sql(self, db): if isinstance(db, (str, Path)): con = sqlite3.connect(db) elif isinstance(db, sqlite3.Connection): con = db self.msgs.drop('object', axis=1).to_sql( name='msgs', con=con, if_exists='replace', index=True, index_label=self.msgs.index.name ) self.reactions.to_sql( name='reactions', con=con, if_exists='replace', index=True, index_label=self.reactions.index.name ) def __str__(self): return str(self.msgs) + '\n\n' + str(self.reactions) async def add_msg(self, message: discord.Message): async with self.lock: mdict = message_dict(message) mdict.pop('id') self.msgs.loc[message.id] = pd.Series(mdict) LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}') async def update_reaction(self, client: discord.Client, payload: RawReactionActionEvent): payload.emoji: discord.PartialEmoji = convert_emoji(payload.emoji) chan: discord.TextChannel = await client.fetch_channel(channel_id=payload.channel_id) msg: discord.Message = await chan.fetch_message(payload.message_id) with self.lock: try: self.reactions.drop(msg.id, level=0, axis=0) except KeyError as e: LOGGER.warning(e) if (new := await reaction_series(msg=msg)) is not None: self.reactions = pd.concat([self.reactions, new.set_index(['msg id', 'emoji'])]) LOGGER.info(f'\n{str(new)}') def emoji_messages(self, emoji_name: str, days: int): res = emoji_messages(msg_df=self.msgs, react_df=self.reactions, emoji_name=emoji_name, days=days) if res is None: raise KeyError(f'No emojis found for {emoji_name}') else: return res def emoji_totals(self, emoji_name: str, days: int): return emoji_totals(edf=self.emoji_messages(emoji_name, days)) def emoji_leaderboard(self, emoji_name: str, days: int): df = self.emoji_totals(emoji_name, days) width = max(list(map(lambda s: len(str(s)), df.index.values))) res = f'{emoji_name} totals, past {days} days\n' res += '\n'.join( f"`{str(name).ljust(width + 1)}with {row['total']:<2.0f} total`" for name, row in df.iterrows() ) return res def cancellation_leaderboard(self, days): return self.emoji_leaderboard(emoji_name='cancelled', days=days) def worst_offsenses(self, user: str, days: int): cdf = self.emoji_messages('cancelled', days=days) cdf = cdf[cdf['display_name'].str.contains(user, case=False)] if cdf.shape[0] > 0: res = f'{user}\'s top 5 cancellations in the last {days} days:\n' res += f'\n'.join( f'`{row["count"]:<2.0f}cancellations`\n{row["link"]}' for idx, row in cdf.iloc[:5].iterrows()) else: res = f'No cancellations for {user} in the past {days} days' return res def biggest_daddy(self, days, top: int = None): df = self.emoji_totals('daddy', days) if top is not None: df = df.iloc[:top] width = max(list(map(lambda s: len(str(s)), df.index.values))) res = f'Daddy totals, past {days} days\n' res += '\n'.join( f"`{name.ljust(width + 1)}with {row['total']:<2.0f} total`" for name, row in df.iterrows() ) return res async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str: data = self.emoji_totals(emoji_name=emoji, days=days) username = data.index[0] reacted_msgs = self.emoji_messages(emoji_name=emoji, days=days) d = reacted_msgs.set_index('display_name')['user id'].drop_duplicates().to_dict() user: discord.User = await client.fetch_user(user_id=d[username]) LOGGER.info(f'User: {user.mention}') return f'{user.mention} with {data.iloc[0]["total"]:.0f} over the past {int(days)} days' def convert_emoji(emoji): try: emoji.name.encode('ascii') except UnicodeEncodeError as e: emoji.name = emoji.name.encode('unicode-escape').decode('ascii') return emoji async def message_df(client: discord.Client, **kwargs): return pd.DataFrame( [message_dict(m) async for m in message_gen(client, **kwargs)] ).set_index('id').sort_values('created', ascending=False) async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs): channels = client.get_all_channels() channels = filter(lambda c: isinstance(c, discord.TextChannel), channels) channels = filter(lambda c: c.category.name != 'Archive', channels) channels = sorted(channels, key=lambda c: (c.category.name, c.name)) for channel in channels[:5]: LOGGER.info(f'{channel.category.name} #{channel.name}') if 'after' not in kwargs: kwargs['after'] = (datetime.today() - timedelta(days=days)) elif isinstance((after := kwargs.get('after', None)), datetime): kwargs['after'] = after.replace(tzinfo=None) async for msg in channel.history(limit=limit, **kwargs): yield msg def message_dict(m: discord.Message) -> Dict: return { 'object': m, 'id': m.id, 'created': m.created_at.astimezone(), 'display_name': m.author.display_name, 'user id': m.author.id, 'message': m.content, 'channel': m.channel.name, 'channel link': f'<#{m.channel.id}>', 'link': m.jump_url, } async def reaction_df(msgs: Iterable[discord.Message]): return pd.concat([await reaction_series(msg) for msg in msgs if len(msg.reactions) > 0]).set_index( ['msg id', 'emoji']) async def reaction_series(msg: discord.Message): if len(msg.reactions) > 0: return pd.DataFrame([ await reaction_dict(r) for r in msg.reactions ]) async def reaction_dict(r: discord.Reaction) -> Dict: is_emoji = isinstance(r.emoji, (discord.Emoji, discord.PartialEmoji)) # LOGGER.info(repr(r.emoji)) return { 'msg id': r.message.id, 'emoji': r.emoji.name if is_emoji else r.emoji.encode('unicode-escape').decode('ascii'), 'emoji id': r.emoji.id if is_emoji else None, 'count': int(r.count), } def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame: cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values if emoji_name in cached_emojis: reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :] reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()] if reacted_msgs.shape[0] == 0: LOGGER.error(f'No messages found with {emoji_name} reactions') else: LOGGER.info( f'Found {reacted_msgs.shape[0]} messages for the leaderboard, {reactions["count"].sum():.0f} reactions total') reacted_msgs['count'] = reacted_msgs.index.to_series().apply( lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count']) reacted_msgs = reacted_msgs[ reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] reacted_msgs = reacted_msgs.sort_values('count', ascending=False) return reacted_msgs else: LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}') def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame: totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int) max_channels = ( edf .groupby(['display_name', 'channel']) .sum()['count'] .sort_values(ascending=False) .groupby(level=0) .apply(lambda gdf: gdf.idxmax()[1]) ) return pd.DataFrame({ 'total': totals, 'max channel': max_channels, # 'worst': cdf.groupby('display_name').max()['link'] }).sort_values('total', ascending=False) if __name__ == '__main__': client = discord.Client() logging.basicConfig(level=logging.INFO) @client.event async def on_ready(): print(f'{client.user} has connected to Discord!') load_dotenv() client.run(os.getenv('DISCORD_TOKEN'))