import logging import os from datetime import datetime, timedelta from typing import Dict, Iterable import discord import pandas as pd import pandas.errors from dotenv import load_dotenv LOGGER = logging.getLogger(__name__) def convert_emoji(emoji): try: emoji.name.encode('ascii') except UnicodeEncodeError as e: emoji.name = emoji.name.encode('unicode-escape').decode('ascii') return emoji async def message_df(client: discord.Client, **kwargs): return pd.DataFrame( [message_dict(m) async for m in message_gen(client, **kwargs)] ).set_index('id').sort_values('created', ascending=False) async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs): channels = client.get_all_channels() channels = filter(lambda c: isinstance(c, discord.TextChannel), channels) channels = filter(lambda c: c.category.name != 'Archive', channels) channels = sorted(channels, key=lambda c: (c.category.name, c.name)) for channel in channels: LOGGER.info(f'{channel.category.name} #{channel.name}') if 'after' not in kwargs: kwargs['after'] = (datetime.today() - timedelta(days=days)) elif isinstance((after := kwargs.get('after', None)), datetime): kwargs['after'] = after.replace(tzinfo=None) async for msg in channel.history(limit=limit, **kwargs): yield msg def message_dict(m: discord.Message) -> Dict: return { 'object': m, 'id': m.id, 'created': m.created_at.astimezone(), 'display_name': m.author.display_name, 'user id': m.author.id, 'message': m.content, 'channel': m.channel.name, 'channel link': f'<#{m.channel.id}>', 'link': m.jump_url, } async def reaction_df(msgs: Iterable[discord.Message]): return pd.concat([await reaction_series(msg) for msg in msgs if len(msg.reactions) > 0]).set_index( ['msg id', 'emoji']) async def reaction_series(msg: discord.Message): if len(msg.reactions) > 0: return pd.DataFrame([ await reaction_dict(r) for r in msg.reactions ]) async def reaction_dict(r: discord.Reaction) -> Dict: is_emoji = isinstance(r.emoji, (discord.Emoji, discord.PartialEmoji)) # LOGGER.info(repr(r.emoji)) return { 'msg id': r.message.id, 'emoji': r.emoji.name if is_emoji else r.emoji.encode('unicode-escape').decode('ascii'), 'emoji id': r.emoji.id if is_emoji else None, 'count': int(r.count), } def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame: cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values if emoji_name in cached_emojis: reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :] reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()] if reacted_msgs.shape[0] == 0: LOGGER.error(f'No messages found with {emoji_name} reactions') else: LOGGER.info( f'Found {reacted_msgs.shape[0]} messages for the leaderboard, {reactions["count"].sum():.0f} reactions total') try: reacted_msgs['count'] = reacted_msgs.index.to_series().apply( lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count']) except pandas.errors.InvalidIndexError as e: LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}') reacted_msgs = reacted_msgs[ reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] reacted_msgs = reacted_msgs.sort_values('count', ascending=False) return reacted_msgs else: LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}') def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame: totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int) max_channels = ( edf .groupby(['display_name', 'channel']) .sum()['count'] .sort_values(ascending=False) .groupby(level=0) .apply(lambda gdf: gdf.idxmax()[1]) ) return pd.DataFrame({ 'total': totals, 'max channel': max_channels, # 'worst': cdf.groupby('display_name').max()['link'] }).sort_values('total', ascending=False) if __name__ == '__main__': client = discord.Client() logging.basicConfig(level=logging.INFO) @client.event async def on_ready(): print(f'{client.user} has connected to Discord!') load_dotenv() client.run(os.getenv('DISCORD_TOKEN'))