import logging import os import sqlite3 from datetime import datetime, timedelta from typing import Dict, Iterable, Tuple import discord import pandas as pd from dotenv import load_dotenv async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs): channels = client.get_all_channels() channels = filter(lambda c: isinstance(c, discord.TextChannel), channels) channels = filter(lambda c: c.category.name != 'Archive', channels) for channel in channels: print(f'{channel.category.name} #{channel.name}') if 'after' not in kwargs: kwargs['after'] = (datetime.today() - timedelta(days=days)) elif isinstance((after := kwargs.get('after', None)), datetime): kwargs['after'] = after.replace(tzinfo=None) async for msg in channel.history(limit=limit, **kwargs): yield msg def message_dict(m: discord.Message) -> Dict: return { 'object': m, 'id': m.id, 'created': m.created_at.astimezone(), 'display_name': m.author.display_name, 'message': m.content, 'channel': m.channel.name, 'channel link': f'<#{m.channel.id}>', 'link': m.jump_url, } async def message_df(client: discord.Client, **kwargs): return pd.DataFrame( [message_dict(m) async for m in message_gen(client, **kwargs)] ).set_index('id').sort_values('created', ascending=False) async def reaction_series(msg: discord.Message): return pd.DataFrame( [{ 'msg id': msg.id, 'emoji': r.emoji.name, 'emoji id': r.emoji.id, 'count': r.count, 'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))), } for r in msg.reactions if isinstance(r.emoji, discord.Emoji) ] ) async def reaction_df(msgs: Iterable[discord.Message]): return pd.concat([await reaction_series(msg) for msg in msgs]) def add_reactions(con: sqlite3.Connection, new_reacts: pd.DataFrame, table_name: str = 'reactions'): react_df = load_reactions(con, table_name) react_df = react_df.append(new_reacts, ignore_index=True) react_df = react_df.drop_duplicates(['msg id', 'emoji id']).reset_index(drop=True) try: react_df.to_sql('reactions', con, if_exists='replace', index=False) except sqlite3.InterfaceError as e: logging.exception(e) else: print(f'Saved {react_df.shape[0]} reactions') return react_df def add_msgs(con: sqlite3.Connection, new_msgs: pd.DataFrame, table_name: str = 'msgs'): msg_df = load_msgs(con, table_name) msg_df = msg_df.append(new_msgs) msg_df['created'] = pd.to_datetime(msg_df['created'], utc=True) msg_df = msg_df[~msg_df.index.duplicated()].sort_values('created', ascending=False) try: msg_df.to_sql('msgs', con, if_exists='replace', index=True, index_label=msg_df.index.name) except sqlite3.InterfaceError as e: logging.exception(e) else: print(f'Saved {msg_df.shape[0]} messages') return msg_df async def get_and_save(db_file, client: discord.Client, limit: int, days: int): df = await message_df(client, limit=limit, days=days) print(f'Getting users for each reaction of {df.shape[0]} messages...') reactions = await reaction_df(df['object'].tolist()) print('Done') df = df.drop('object', axis=1) con = sqlite3.connect(db_file) try: msg_df = add_msgs(con, df) react_df = add_reactions(con, reactions) except Exception as e: logging.exception(e) finally: con.close() return msg_df, react_df def load_both(con: sqlite3.Connection) -> Tuple[pd.DataFrame, pd.DataFrame]: return (load_msgs(con), load_reactions(con)) def load_msgs(con: sqlite3.Connection, table_name: str = 'msgs') -> pd.DataFrame: df = pd.read_sql(f'select * from {table_name}', con, index_col='id') df['created'] = df['created'].apply(pd.to_datetime, utc=True) return df def load_reactions(con: sqlite3.Connection, table_name: str = 'reactions') -> pd.DataFrame: return pd.read_sql(f'select * from {table_name}', con) def cancellations(msg_df, react_df, days: int = 10) -> pd.DataFrame: cancelled = react_df[react_df['emoji'] == 'cancelled'] cancelled_msgs = msg_df.loc[cancelled['msg id'].to_list()] cancelled_msgs['created'] = cancelled_msgs['created'].apply(pd.to_datetime, utc=True) cancelled_msgs = cancelled_msgs[cancelled_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] cancelled_msgs['count'] = cancelled.set_index('msg id').loc[cancelled_msgs.index]['count'] cancelled_msgs = cancelled_msgs.sort_values('count', ascending=False) return cancelled_msgs def cancelled_totals(cdf: pd.DataFrame) -> pd.DataFrame: totals = cdf.groupby('display_name').sum()['count'].sort_values(ascending=False) max_channels = ( cdf .groupby(['display_name', 'channel']) .sum()['count'] .sort_values(ascending=False) .groupby(level=0) .apply(lambda gdf: gdf.idxmax()[1]) ) return pd.DataFrame({ 'total': totals, 'max channel': max_channels, 'worst': cdf.groupby('display_name').max()['link'] }).sort_values('total', ascending=False) def report_string(df): width = max(list(map(lambda s: len(str(s)), df.index.values))) return '\n'.join( f"`{name.ljust(width + 1)}with {row['total']:<2} total`" for name, row in df.iterrows() ) if __name__ == '__main__': client = discord.Client() logging.basicConfig(level=logging.INFO) @client.event async def on_ready(): print(f'{client.user} has connected to Discord!') await get_and_save('messages.db', client, limit=5000, days=90) load_dotenv() client.run(os.getenv('DISCORD_TOKEN'))