broke out the cancellation tallying logic and started saving messages to a sqlite3 database

This commit is contained in:
2021-08-01 16:44:23 -05:00
parent b15a2de24e
commit f497ba43a8
2 changed files with 183 additions and 44 deletions

164
msg.py Normal file
View File

@@ -0,0 +1,164 @@
import logging
import os
import sqlite3
from datetime import datetime, timedelta
from typing import Dict, Iterable, Tuple
import discord
import pandas as pd
from dotenv import load_dotenv
async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs):
channels = client.get_all_channels()
channels = filter(lambda c: isinstance(c, discord.TextChannel), channels)
channels = filter(lambda c: c.category.name != 'Archive', channels)
for channel in channels:
print(f'{channel.category.name} #{channel.name}')
async for msg in channel.history(limit=limit, after=(datetime.today() - timedelta(days=days)), **kwargs):
yield msg
def message_dict(m: discord.Message) -> Dict:
return {
'object': m,
'id': m.id,
'created': m.created_at.astimezone(),
'display_name': m.author.display_name,
'message': m.content,
'channel': m.channel.name,
'channel link': f'<#{m.channel.id}>',
'link': m.jump_url,
}
async def message_df(client: discord.Client, **kwargs):
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def reaction_series(msg: discord.Message):
return pd.DataFrame(
[{
'msg id': msg.id,
'emoji': r.emoji.name,
'emoji id': r.emoji.id,
'count': r.count,
'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))),
}
for r in msg.reactions
if isinstance(r.emoji, discord.Emoji)
]
)
async def reaction_df(msgs: Iterable[discord.Message]):
return pd.concat([await reaction_series(msg) for msg in msgs])
def add_reactions(con: sqlite3.Connection, new_reacts: pd.DataFrame, table_name: str = 'reactions'):
react_df = pd.read_sql(f'select * from {table_name}', con)
react_df = react_df.append(new_reacts, ignore_index=True)
react_df = react_df.drop_duplicates(['msg id', 'emoji id']).reset_index(drop=True)
try:
react_df.to_sql('reactions', con, if_exists='replace', index=False)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {react_df.shape[0]} reactions')
return react_df
def add_msgs(con: sqlite3.Connection, new_msgs: pd.DataFrame, table_name: str = 'msgs'):
msg_df = pd.read_sql(f'select * from {table_name}', con, index_col='id')
msg_df = msg_df.append(new_msgs)
msg_df['created'] = pd.to_datetime(msg_df['created'], utc=True)
msg_df = msg_df[~msg_df.index.duplicated()].sort_values('created', ascending=False)
try:
msg_df.to_sql('msgs', con, if_exists='replace', index=True, index_label=msg_df.index.name)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {msg_df.shape[0]} messages')
return msg_df
async def get_and_save(db_file, client: discord.Client, limit: int, days: int):
df = await message_df(client, limit=limit, days=days)
print(f'Getting users for each reaction of {df.shape[0]} messages...')
reactions = await reaction_df(df['object'].tolist())
print('Done')
df = df.drop('object', axis=1)
con = sqlite3.connect(db_file)
try:
msg_df = add_msgs(con, df)
react_df = add_reactions(con, reactions)
except Exception as e:
logging.exception(e)
finally:
con.close()
return msg_df, react_df
def load_both(con) -> Tuple[pd.DataFrame, pd.DataFrame]:
return (
pd.read_sql(f'select * from msgs', con, index_col='id'),
pd.read_sql(f'select * from reactions', con)
)
def cancellations(msg_df, react_df, days: int = 10) -> pd.DataFrame:
cancelled = react_df[react_df['emoji'] == 'cancelled']
cancelled_msgs = msg_df.loc[cancelled['msg id'].to_list()]
cancelled_msgs['created'] = cancelled_msgs['created'].apply(pd.to_datetime, utc=True)
cancelled_msgs = cancelled_msgs[cancelled_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
cancelled_msgs['count'] = cancelled.set_index('msg id').loc[cancelled_msgs.index]['count']
cancelled_msgs = cancelled_msgs.sort_values('count', ascending=False)
return cancelled_msgs
def cancelled_totals(cdf: pd.DataFrame) -> pd.DataFrame:
totals = cdf.groupby('display_name').sum()['count'].sort_values(ascending=False)
max_channels = (
cdf
.groupby(['display_name', 'channel'])
.sum()['count']
.sort_values(ascending=False)
.groupby(level=0)
.apply(lambda gdf: gdf.idxmax()[1])
)
return pd.DataFrame({
'total': totals,
'max channel': max_channels,
'worst': cdf.groupby('display_name').max()['link']
}).sort_values('total', ascending=False)
def report_string(df):
width = max(list(map(lambda s: len(str(s)), df.index.values)))
return '\n'.join(
f"`{name.ljust(width + 1)}with {row['total']:<2} total`\n{row['worst']}"
for name, row in df.iterrows()
)
if __name__ == '__main__':
client = discord.Client()
logging.basicConfig(level=logging.INFO)
@client.event
async def on_ready():
print(f'{client.user} has connected to Discord!')
await get_and_save('messages.db', client, limit=5000, days=90)
load_dotenv()
client.run(os.getenv('DISCORD_TOKEN'))

View File

@@ -4,31 +4,16 @@ import re
import discord import discord
import nltk import nltk
import pandas as pd
import stockquotes import stockquotes
from dotenv import load_dotenv from dotenv import load_dotenv
from msg import get_and_save, cancellations, cancelled_totals, report_string
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
LIL_STINKY_ID = 704043422276780072 LIL_STINKY_ID = 704043422276780072
def cancelled_totals(df):
totals = df.groupby('display_name')['cancelled'].sum().sort_values(ascending=False)
name_width = df['display_name'].apply(str).apply(len).max()
df2 = df.groupby(['display_name', 'channel']).sum().reset_index().groupby('display_name').max()
df2.columns = ['channel most cancelled', 'channel cancel count']
df2['total cancelled'] = df2.index.to_series().apply(lambda n: df[df['display_name'] == n]['cancelled'].sum())
df2['link'] = df2.index.to_series().apply(lambda n: df[df['channel'] == df2.loc[n].iloc[0]]['channel link'].iloc[0])
res = 'Cancellation totals:\n'
res += '\n'.join(
f'`{total} {name.ljust(name_width)}` most in {df2.loc[name, "link"]}' for name, total in totals.iteritems()
)
return res
class RoboPage(discord.Client): class RoboPage(discord.Client):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(RoboPage, self).__init__(*args, **kwargs) super(RoboPage, self).__init__(*args, **kwargs)
@@ -38,7 +23,10 @@ class RoboPage(discord.Client):
AssJoke(), AssJoke(),
DominosJoke() DominosJoke()
] ]
print() self.db_path = 'messages.db'
def run(self):
return super().run(os.getenv('DISCORD_TOKEN'))
async def reaction_messages(self, target: str, **kwargs): async def reaction_messages(self, target: str, **kwargs):
for c in self.get_all_channels(): for c in self.get_all_channels():
@@ -53,26 +41,6 @@ class RoboPage(discord.Client):
print(r.count, m.author.display_name) print(r.count, m.author.display_name)
yield m, r.count yield m, r.count
async def get_cancelled_totals(self, limit=1000):
df = pd.DataFrame(
[{
'display_name': m.author.display_name,
'cancelled': count,
'message': m.content,
'channel': m.channel.name,
'channel link': f'<#{m.channel.id}>',
'link': m.jump_url
}
async for m, count in self.reaction_messages('cancelled', limit=limit)]
)
df.to_csv('msgs.csv', index=False)
return cancelled_totals(df)
def run(self):
return super().run(os.getenv('DISCORD_TOKEN'))
async def handle_ready(self): async def handle_ready(self):
channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility') channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility')
await channel.send(f"I'm aliiiiiive {discord.utils.get(self.emojis, name='kaylon')}") await channel.send(f"I'm aliiiiiive {discord.utils.get(self.emojis, name='kaylon')}")
@@ -88,6 +56,11 @@ class RoboPage(discord.Client):
print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}') print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}')
await joke.respond(message, self, scan_res) await joke.respond(message, self, scan_res)
async def get_cancelled_totals(self, limit=1000, days: int = 90):
msg_df, react_df = await get_and_save('messages.db', client=self, limit=limit, days=days)
res = cancelled_totals(cancellations(msg_df, react_df, days=days))
return report_string(res.iloc[:5])
class Joke: class Joke:
@property @property
@@ -159,6 +132,7 @@ class DominosJoke(Joke):
pattern = 'NP: {<DT>?<JJ>*<NN>}' pattern = 'NP: {<DT>?<JJ>*<NN>}'
cp = nltk.RegexpParser(pattern) cp = nltk.RegexpParser(pattern)
def token_list(s): def token_list(s):
return nltk.chunk.tree2conlltags( return nltk.chunk.tree2conlltags(
cp.parse( cp.parse(
@@ -191,18 +165,19 @@ def unblack(s):
if __name__ == '__main__': if __name__ == '__main__':
load_dotenv() load_dotenv()
TOKEN = os.getenv('DISCORD_TOKEN')
client = RoboPage() client = RoboPage()
@client.event @client.event
async def on_ready(): async def on_ready():
print(f'{client.user} has connected to Discord!') print(f'{client.user} has connected to Discord!')
await client.handle_ready() # await client.handle_ready()
# print(await client.get_cancelled_totals(limit=100))
@client.event @client.event
async def on_message(message: discord.Message): async def on_message(message: discord.Message):
await client.handle_message(message) await client.handle_message(message)
client.run() client.run()