From f497ba43a8112eb50913acc00846dc1b1f069699 Mon Sep 17 00:00:00 2001 From: jsl12 Date: Sun, 1 Aug 2021 16:44:23 -0500 Subject: [PATCH] broke out the cancellation tallying logic and started saving messages to a sqlite3 database --- msg.py | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++ robopage.py | 63 ++++++-------------- 2 files changed, 183 insertions(+), 44 deletions(-) create mode 100644 msg.py diff --git a/msg.py b/msg.py new file mode 100644 index 0000000..8f6e267 --- /dev/null +++ b/msg.py @@ -0,0 +1,164 @@ +import logging +import os +import sqlite3 +from datetime import datetime, timedelta +from typing import Dict, Iterable, Tuple + +import discord +import pandas as pd +from dotenv import load_dotenv + + +async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs): + channels = client.get_all_channels() + channels = filter(lambda c: isinstance(c, discord.TextChannel), channels) + channels = filter(lambda c: c.category.name != 'Archive', channels) + for channel in channels: + print(f'{channel.category.name} #{channel.name}') + async for msg in channel.history(limit=limit, after=(datetime.today() - timedelta(days=days)), **kwargs): + yield msg + + +def message_dict(m: discord.Message) -> Dict: + return { + 'object': m, + 'id': m.id, + 'created': m.created_at.astimezone(), + 'display_name': m.author.display_name, + 'message': m.content, + 'channel': m.channel.name, + 'channel link': f'<#{m.channel.id}>', + 'link': m.jump_url, + } + + +async def message_df(client: discord.Client, **kwargs): + return pd.DataFrame( + [message_dict(m) async for m in message_gen(client, **kwargs)] + ).set_index('id').sort_values('created', ascending=False) + + +async def reaction_series(msg: discord.Message): + return pd.DataFrame( + [{ + 'msg id': msg.id, + 'emoji': r.emoji.name, + 'emoji id': r.emoji.id, + 'count': r.count, + 'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))), + } + for r in msg.reactions + if isinstance(r.emoji, discord.Emoji) + ] + ) + + +async def reaction_df(msgs: Iterable[discord.Message]): + return pd.concat([await reaction_series(msg) for msg in msgs]) + + +def add_reactions(con: sqlite3.Connection, new_reacts: pd.DataFrame, table_name: str = 'reactions'): + react_df = pd.read_sql(f'select * from {table_name}', con) + react_df = react_df.append(new_reacts, ignore_index=True) + react_df = react_df.drop_duplicates(['msg id', 'emoji id']).reset_index(drop=True) + try: + react_df.to_sql('reactions', con, if_exists='replace', index=False) + except sqlite3.InterfaceError as e: + logging.exception(e) + else: + print(f'Saved {react_df.shape[0]} reactions') + return react_df + + +def add_msgs(con: sqlite3.Connection, new_msgs: pd.DataFrame, table_name: str = 'msgs'): + msg_df = pd.read_sql(f'select * from {table_name}', con, index_col='id') + msg_df = msg_df.append(new_msgs) + msg_df['created'] = pd.to_datetime(msg_df['created'], utc=True) + msg_df = msg_df[~msg_df.index.duplicated()].sort_values('created', ascending=False) + try: + msg_df.to_sql('msgs', con, if_exists='replace', index=True, index_label=msg_df.index.name) + except sqlite3.InterfaceError as e: + logging.exception(e) + else: + print(f'Saved {msg_df.shape[0]} messages') + return msg_df + + +async def get_and_save(db_file, client: discord.Client, limit: int, days: int): + df = await message_df(client, limit=limit, days=days) + print(f'Getting users for each reaction of {df.shape[0]} messages...') + reactions = await reaction_df(df['object'].tolist()) + print('Done') + + df = df.drop('object', axis=1) + + con = sqlite3.connect(db_file) + try: + msg_df = add_msgs(con, df) + react_df = add_reactions(con, reactions) + except Exception as e: + logging.exception(e) + finally: + con.close() + return msg_df, react_df + + +def load_both(con) -> Tuple[pd.DataFrame, pd.DataFrame]: + return ( + pd.read_sql(f'select * from msgs', con, index_col='id'), + pd.read_sql(f'select * from reactions', con) + ) + + +def cancellations(msg_df, react_df, days: int = 10) -> pd.DataFrame: + cancelled = react_df[react_df['emoji'] == 'cancelled'] + cancelled_msgs = msg_df.loc[cancelled['msg id'].to_list()] + + cancelled_msgs['created'] = cancelled_msgs['created'].apply(pd.to_datetime, utc=True) + cancelled_msgs = cancelled_msgs[cancelled_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] + + cancelled_msgs['count'] = cancelled.set_index('msg id').loc[cancelled_msgs.index]['count'] + cancelled_msgs = cancelled_msgs.sort_values('count', ascending=False) + + return cancelled_msgs + + +def cancelled_totals(cdf: pd.DataFrame) -> pd.DataFrame: + totals = cdf.groupby('display_name').sum()['count'].sort_values(ascending=False) + max_channels = ( + cdf + .groupby(['display_name', 'channel']) + .sum()['count'] + .sort_values(ascending=False) + .groupby(level=0) + .apply(lambda gdf: gdf.idxmax()[1]) + ) + return pd.DataFrame({ + 'total': totals, + 'max channel': max_channels, + 'worst': cdf.groupby('display_name').max()['link'] + }).sort_values('total', ascending=False) + + +def report_string(df): + width = max(list(map(lambda s: len(str(s)), df.index.values))) + return '\n'.join( + f"`{name.ljust(width + 1)}with {row['total']:<2} total`\n{row['worst']}" + for name, row in df.iterrows() + ) + + +if __name__ == '__main__': + client = discord.Client() + + logging.basicConfig(level=logging.INFO) + + + @client.event + async def on_ready(): + print(f'{client.user} has connected to Discord!') + await get_and_save('messages.db', client, limit=5000, days=90) + + + load_dotenv() + client.run(os.getenv('DISCORD_TOKEN')) diff --git a/robopage.py b/robopage.py index a8c3f95..6d669d8 100644 --- a/robopage.py +++ b/robopage.py @@ -4,31 +4,16 @@ import re import discord import nltk -import pandas as pd import stockquotes from dotenv import load_dotenv +from msg import get_and_save, cancellations, cancelled_totals, report_string + logging.basicConfig(level=logging.INFO) LIL_STINKY_ID = 704043422276780072 -def cancelled_totals(df): - totals = df.groupby('display_name')['cancelled'].sum().sort_values(ascending=False) - name_width = df['display_name'].apply(str).apply(len).max() - - df2 = df.groupby(['display_name', 'channel']).sum().reset_index().groupby('display_name').max() - df2.columns = ['channel most cancelled', 'channel cancel count'] - df2['total cancelled'] = df2.index.to_series().apply(lambda n: df[df['display_name'] == n]['cancelled'].sum()) - df2['link'] = df2.index.to_series().apply(lambda n: df[df['channel'] == df2.loc[n].iloc[0]]['channel link'].iloc[0]) - - res = 'Cancellation totals:\n' - res += '\n'.join( - f'`{total} {name.ljust(name_width)}` most in {df2.loc[name, "link"]}' for name, total in totals.iteritems() - ) - return res - - class RoboPage(discord.Client): def __init__(self, *args, **kwargs): super(RoboPage, self).__init__(*args, **kwargs) @@ -38,7 +23,10 @@ class RoboPage(discord.Client): AssJoke(), DominosJoke() ] - print() + self.db_path = 'messages.db' + + def run(self): + return super().run(os.getenv('DISCORD_TOKEN')) async def reaction_messages(self, target: str, **kwargs): for c in self.get_all_channels(): @@ -53,26 +41,6 @@ class RoboPage(discord.Client): print(r.count, m.author.display_name) yield m, r.count - - async def get_cancelled_totals(self, limit=1000): - df = pd.DataFrame( - [{ - 'display_name': m.author.display_name, - 'cancelled': count, - 'message': m.content, - 'channel': m.channel.name, - 'channel link': f'<#{m.channel.id}>', - 'link': m.jump_url - } - async for m, count in self.reaction_messages('cancelled', limit=limit)] - ) - - df.to_csv('msgs.csv', index=False) - return cancelled_totals(df) - - def run(self): - return super().run(os.getenv('DISCORD_TOKEN')) - async def handle_ready(self): channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility') await channel.send(f"I'm aliiiiiive {discord.utils.get(self.emojis, name='kaylon')}") @@ -88,6 +56,11 @@ class RoboPage(discord.Client): print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}') await joke.respond(message, self, scan_res) + async def get_cancelled_totals(self, limit=1000, days: int = 90): + msg_df, react_df = await get_and_save('messages.db', client=self, limit=limit, days=days) + res = cancelled_totals(cancellations(msg_df, react_df, days=days)) + return report_string(res.iloc[:5]) + class Joke: @property @@ -159,6 +132,7 @@ class DominosJoke(Joke): pattern = 'NP: {
?*}' cp = nltk.RegexpParser(pattern) + def token_list(s): return nltk.chunk.tree2conlltags( cp.parse( @@ -172,9 +146,9 @@ def assify(s): for i, (text, tag, iob) in enumerate(tag_list): if text[-3:].lower() == 'ass': try: - next_tag = tag_list[i+1][1] + next_tag = tag_list[i + 1][1] if next_tag == 'NN' or next_tag == 'NNS': - return f'ass-{tag_list[i+1][0]}' + return f'ass-{tag_list[i + 1][0]}' except IndexError as e: return @@ -184,25 +158,26 @@ def unblack(s): for i, (text, tag, iob) in enumerate(tag_list): if text.lower() == 'black': if tag.startswith('JJ') or tag.startswith('NN'): - for text, tag, iob in tag_list[i+1:]: + for text, tag, iob in tag_list[i + 1:]: if tag.startswith('NN'): return f'Or as I would say, {text.lower()}' if __name__ == '__main__': load_dotenv() - TOKEN = os.getenv('DISCORD_TOKEN') client = RoboPage() + @client.event async def on_ready(): print(f'{client.user} has connected to Discord!') - await client.handle_ready() - # print(await client.get_cancelled_totals(limit=100)) + # await client.handle_ready() + @client.event async def on_message(message: discord.Message): await client.handle_message(message) + client.run()