Files
kwaylon/msg.py

176 lines
5.8 KiB
Python

import logging
import os
import sqlite3
from datetime import datetime, timedelta
from typing import Dict, Iterable, Tuple
import discord
import pandas as pd
from dotenv import load_dotenv
async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs):
channels = client.get_all_channels()
channels = filter(lambda c: isinstance(c, discord.TextChannel), channels)
channels = filter(lambda c: c.category.name != 'Archive', channels)
for channel in channels:
print(f'{channel.category.name} #{channel.name}')
if 'after' not in kwargs:
kwargs['after'] = (datetime.today() - timedelta(days=days))
elif isinstance((after := kwargs.get('after', None)), datetime):
kwargs['after'] = after.replace(tzinfo=None)
async for msg in channel.history(limit=limit, **kwargs):
yield msg
def message_dict(m: discord.Message) -> Dict:
return {
'object': m,
'id': m.id,
'created': m.created_at.astimezone(),
'display_name': m.author.display_name,
'message': m.content,
'channel': m.channel.name,
'channel link': f'<#{m.channel.id}>',
'link': m.jump_url,
}
async def message_df(client: discord.Client, **kwargs):
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def reaction_series(msg: discord.Message):
return pd.DataFrame(
[{
'msg id': msg.id,
'emoji': r.emoji.name,
'emoji id': r.emoji.id,
'count': r.count,
'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))),
}
for r in msg.reactions
if isinstance(r.emoji, discord.Emoji)
]
)
async def reaction_df(msgs: Iterable[discord.Message]):
return pd.concat([await reaction_series(msg) for msg in msgs])
def add_reactions(con: sqlite3.Connection, new_reacts: pd.DataFrame, table_name: str = 'reactions'):
react_df = load_reactions(con, table_name)
react_df = react_df.append(new_reacts, ignore_index=True)
react_df = react_df.drop_duplicates(['msg id', 'emoji id']).reset_index(drop=True)
try:
react_df.to_sql('reactions', con, if_exists='replace', index=False)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {react_df.shape[0]} reactions')
return react_df
def add_msgs(con: sqlite3.Connection, new_msgs: pd.DataFrame, table_name: str = 'msgs'):
msg_df = load_msgs(con, table_name)
msg_df = msg_df.append(new_msgs)
msg_df['created'] = pd.to_datetime(msg_df['created'], utc=True)
msg_df = msg_df[~msg_df.index.duplicated()].sort_values('created', ascending=False)
try:
msg_df.to_sql('msgs', con, if_exists='replace', index=True, index_label=msg_df.index.name)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {msg_df.shape[0]} messages')
return msg_df
async def get_and_save(db_file, client: discord.Client, limit: int, days: int):
df = await message_df(client, limit=limit, days=days)
print(f'Getting users for each reaction of {df.shape[0]} messages...')
reactions = await reaction_df(df['object'].tolist())
print('Done')
df = df.drop('object', axis=1)
con = sqlite3.connect(db_file)
try:
msg_df = add_msgs(con, df)
react_df = add_reactions(con, reactions)
except Exception as e:
logging.exception(e)
finally:
con.close()
return msg_df, react_df
def load_both(con: sqlite3.Connection) -> Tuple[pd.DataFrame, pd.DataFrame]:
return (load_msgs(con), load_reactions(con))
def load_msgs(con: sqlite3.Connection, table_name: str = 'msgs') -> pd.DataFrame:
df = pd.read_sql(f'select * from {table_name}', con, index_col='id')
df['created'] = df['created'].apply(pd.to_datetime, utc=True)
return df
def load_reactions(con: sqlite3.Connection, table_name: str = 'reactions') -> pd.DataFrame:
return pd.read_sql(f'select * from {table_name}', con)
def cancellations(msg_df, react_df, days: int = 10) -> pd.DataFrame:
cancelled = react_df[react_df['emoji'] == 'cancelled']
cancelled_msgs = msg_df.loc[cancelled['msg id'].to_list()]
cancelled_msgs['created'] = cancelled_msgs['created'].apply(pd.to_datetime, utc=True)
cancelled_msgs = cancelled_msgs[cancelled_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
cancelled_msgs['count'] = cancelled.set_index('msg id').loc[cancelled_msgs.index]['count']
cancelled_msgs = cancelled_msgs.sort_values('count', ascending=False)
return cancelled_msgs
def cancelled_totals(cdf: pd.DataFrame) -> pd.DataFrame:
totals = cdf.groupby('display_name').sum()['count'].sort_values(ascending=False)
max_channels = (
cdf
.groupby(['display_name', 'channel'])
.sum()['count']
.sort_values(ascending=False)
.groupby(level=0)
.apply(lambda gdf: gdf.idxmax()[1])
)
return pd.DataFrame({
'total': totals,
'max channel': max_channels,
'worst': cdf.groupby('display_name').max()['link']
}).sort_values('total', ascending=False)
def report_string(df):
width = max(list(map(lambda s: len(str(s)), df.index.values)))
return '\n'.join(
f"`{name.ljust(width + 1)}with {row['total']:<2} total`"
for name, row in df.iterrows()
)
if __name__ == '__main__':
client = discord.Client()
logging.basicConfig(level=logging.INFO)
@client.event
async def on_ready():
print(f'{client.user} has connected to Discord!')
await get_and_save('messages.db', client, limit=5000, days=90)
load_dotenv()
client.run(os.getenv('DISCORD_TOKEN'))