created MsgData, reworked cancellation calculations

This commit is contained in:
2021-08-10 20:53:31 -05:00
parent 5776684b20
commit 156715879f
2 changed files with 132 additions and 107 deletions

176
msg.py
View File

@@ -1,20 +1,85 @@
import asyncio
import logging
import os
import sqlite3
from datetime import datetime, timedelta
from typing import Dict, Iterable, Tuple
from typing import Dict, Iterable
import discord
import pandas as pd
from discord.raw_models import RawReactionActionEvent
from dotenv import load_dotenv
LOGGER = logging.getLogger(__name__)
class MsgData:
msgs: pd.DataFrame
reactions: pd.DataFrame
lock: asyncio.Lock
@classmethod
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
self.lock = asyncio.Lock()
return self
def __str__(self):
return str(self.msgs) + '\n\n' + str(self.reactions)
async def add_msg(self, message: discord.Message):
async with self.lock:
mdict = message_dict(message)
mdict.pop('id')
self.msgs.loc[message.id] = pd.Series(mdict)
LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
async def update_reaction(self, client: discord.Client, payload: RawReactionActionEvent):
if isinstance(payload.emoji, discord.PartialEmoji):
chan: discord.TextChannel = await client.fetch_channel(channel_id=payload.channel_id)
msg: discord.Message = await chan.fetch_message(id=payload.message_id)
idx = (msg.id, payload.emoji.name)
for reaction in msg.reactions:
if isinstance(reaction.emoji, discord.Emoji) and reaction.emoji.name == payload.emoji.name:
reactions = pd.Series(await reaction_dict(reaction))
async with self.lock:
self.reactions.loc[pd.IndexSlice[idx], :] = reactions
LOGGER.info(f'Added {str(idx)}, {int(self.reactions.loc[pd.IndexSlice[idx], "count"])} total')
break
else:
# only reaches here if the remove action was to take off the last reaction of that type
if payload.event_type == 'REACTION_REMOVE':
try:
async with self.lock:
self.reactions = self.reactions.drop(idx, axis=0)
except KeyError as e:
LOGGER.info(f'{idx} not in index')
else:
LOGGER.info(f'Dropped {idx}')
def cancellations(self, days: int = 14):
return cancellations(msg_df=self.msgs, react_df=self.reactions, days=days)
def cancellation_totals(self, days: int = 14):
return cancelled_totals(cdf=self.cancellations(days=days))
async def message_df(client: discord.Client, **kwargs):
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs):
channels = client.get_all_channels()
channels = filter(lambda c: isinstance(c, discord.TextChannel), channels)
channels = filter(lambda c: c.category.name != 'Archive', channels)
channels = sorted(channels, key=lambda c: (c.category.name, c.name))
for channel in channels:
print(f'{channel.category.name} #{channel.name}')
LOGGER.info(f'{channel.category.name} #{channel.name}')
if 'after' not in kwargs:
kwargs['after'] = (datetime.today() - timedelta(days=days))
elif isinstance((after := kwargs.get('after', None)), datetime):
@@ -36,100 +101,44 @@ def message_dict(m: discord.Message) -> Dict:
}
async def message_df(client: discord.Client, **kwargs):
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def reaction_df(msgs: Iterable[discord.Message]):
return pd.concat([await reaction_series(msg) for msg in msgs]).set_index(['msg id', 'emoji'])
async def reaction_series(msg: discord.Message):
return pd.DataFrame(
[{
'msg id': msg.id,
'emoji': r.emoji.name,
'emoji id': r.emoji.id,
'count': r.count,
'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))),
}
[
await reaction_dict(r)
for r in msg.reactions
if isinstance(r.emoji, discord.Emoji)
]
)
async def reaction_df(msgs: Iterable[discord.Message]):
return pd.concat([await reaction_series(msg) for msg in msgs])
def add_reactions(con: sqlite3.Connection, new_reacts: pd.DataFrame, table_name: str = 'reactions'):
react_df = load_reactions(con, table_name)
react_df = react_df.append(new_reacts, ignore_index=True)
react_df = react_df.drop_duplicates(['msg id', 'emoji id']).reset_index(drop=True)
try:
react_df.to_sql('reactions', con, if_exists='replace', index=False)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {react_df.shape[0]} reactions')
return react_df
def add_msgs(con: sqlite3.Connection, new_msgs: pd.DataFrame, table_name: str = 'msgs'):
msg_df = load_msgs(con, table_name)
msg_df = msg_df.append(new_msgs)
msg_df['created'] = pd.to_datetime(msg_df['created'], utc=True)
msg_df = msg_df[~msg_df.index.duplicated()].sort_values('created', ascending=False)
try:
msg_df.to_sql('msgs', con, if_exists='replace', index=True, index_label=msg_df.index.name)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {msg_df.shape[0]} messages')
return msg_df
async def get_and_save(db_file, client: discord.Client, limit: int, days: int):
df = await message_df(client, limit=limit, days=days)
print(f'Getting users for each reaction of {df.shape[0]} messages...')
reactions = await reaction_df(df['object'].tolist())
print('Done')
df = df.drop('object', axis=1)
con = sqlite3.connect(db_file)
try:
msg_df = add_msgs(con, df)
react_df = add_reactions(con, reactions)
except Exception as e:
logging.exception(e)
finally:
con.close()
return msg_df, react_df
def load_both(con: sqlite3.Connection) -> Tuple[pd.DataFrame, pd.DataFrame]:
return (load_msgs(con), load_reactions(con))
def load_msgs(con: sqlite3.Connection, table_name: str = 'msgs') -> pd.DataFrame:
df = pd.read_sql(f'select * from {table_name}', con, index_col='id')
df['created'] = df['created'].apply(pd.to_datetime, utc=True)
return df
def load_reactions(con: sqlite3.Connection, table_name: str = 'reactions') -> pd.DataFrame:
return pd.read_sql(f'select * from {table_name}', con)
async def reaction_dict(r: discord.Reaction) -> Dict:
return {
'msg id': r.message.id,
'emoji': r.emoji.name,
'emoji id': r.emoji.id,
'count': int(r.count),
# 'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))),
}
def cancellations(msg_df, react_df, days: int = 10) -> pd.DataFrame:
cancelled = react_df[react_df['emoji'] == 'cancelled']
cancelled_msgs = msg_df.loc[cancelled['msg id'].to_list()]
# get reactions with a cancellation emoji
cancel_reactions = react_df.loc[pd.IndexSlice[:, 'cancelled'], :]
cancel_msgs = msg_df.loc[cancel_reactions.index.get_level_values(0).to_list()]
cancelled_msgs['created'] = cancelled_msgs['created'].apply(pd.to_datetime, utc=True)
cancelled_msgs = cancelled_msgs[cancelled_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
cancel_msgs['count'] = cancel_msgs.index.to_series().apply(
lambda idx: cancel_reactions.loc[pd.IndexSlice[idx, 'cancelled'], 'count'])
cancelled_msgs['count'] = cancelled.set_index('msg id').loc[cancelled_msgs.index]['count']
cancelled_msgs = cancelled_msgs.sort_values('count', ascending=False)
# filter outdated messages
cancel_msgs = cancel_msgs[cancel_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
cancel_msgs = cancel_msgs.sort_values('count', ascending=False)
return cancel_msgs
return cancelled_msgs
@@ -154,7 +163,7 @@ def cancelled_totals(cdf: pd.DataFrame) -> pd.DataFrame:
def report_string(df):
width = max(list(map(lambda s: len(str(s)), df.index.values)))
return '\n'.join(
f"`{name.ljust(width + 1)}with {row['total']:<2} total`"
f"`{name.ljust(width + 1)}with {row['total']:<2.0f} total`"
for name, row in df.iterrows()
)
@@ -168,7 +177,6 @@ if __name__ == '__main__':
@client.event
async def on_ready():
print(f'{client.user} has connected to Discord!')
await get_and_save('messages.db', client, limit=5000, days=90)
load_dotenv()

View File

@@ -6,13 +6,15 @@ from threading import Lock
import discord
from dotenv import load_dotenv
import msg
from jokes import CumJoke, BlackJoke, AssJoke, DominosJoke
from msg import get_and_save, cancellations, cancelled_totals, report_string
logging.basicConfig(level=logging.INFO)
LIL_STINKY_ID = 704043422276780072
LOGGER = logging.getLogger(__name__)
class RoboPage(discord.Client):
db_path: str = 'messages.db'
@@ -31,45 +33,49 @@ class RoboPage(discord.Client):
return super().run(os.getenv('DISCORD_TOKEN'))
async def handle_ready(self):
channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility')
await channel.send(f"I'm aliiiiiive {discord.utils.get(self.emojis, name='kaylon')}")
async def alive():
channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility')
await channel.send(f"I'm aliiiiiive {discord.utils.get(self.emojis, name='kaylon')}")
self.data: msg.MsgData = await msg.MsgData.create(
client=self,
limit=3000,
# limit=20,
days=14,
)
LOGGER.info(str(self.data.msgs.columns))
LOGGER.info(str(self.data.reactions.columns))
async def handle_message(self, message):
await self.data.add_msg(message)
if message.author != self.user:
if 'most cancelled' in message.content:
msg: discord.Message = await message.reply('Hold please...')
await message.reply(await self.get_cancelled_totals(limit=1000, days=14))
await message.reply(self.get_cancelled_totals(days=14))
elif (m := re.search('top cancelled (?P<name>\w+)', message.content)) is not None:
if self.lock.acquire(blocking=False):
msg: discord.Message = await message.reply('Hold please...')
await message.reply(await self.top_cancellations(user=m.group('name'), limit=1000, days=14))
self.lock.release()
else:
await message.reply("I'm busy!")
async with self.data.lock:
await message.reply(self.top_cancellations(user=m.group('name'), days=14))
for joke in self.jokes:
if (scan_res := joke.scan(message)):
print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}')
await joke.respond(message, self, scan_res)
async def get_cancelled_totals(self, limit=1000, days: int = 90):
msg_df, react_df = await get_and_save(self.db_path, client=self, limit=limit, days=days)
res = cancelled_totals(cancellations(msg_df, react_df, days=days))
res = f'Cancellation totals, past {days} days\n' + report_string(res.iloc[:5])
def get_cancelled_totals(self, days):
res = self.data.cancellation_totals(days)
res = f'Cancellation totals, past {days} days\n' + msg.report_string(res.iloc[:5])
return res
async def top_cancellations(self, user: str, limit: int, days: int):
msg_df, react_df = await get_and_save(self.db_path, client=self, limit=limit, days=days)
cdf = cancellations(msg_df, react_df, days=days)
def top_cancellations(self, user: str, days: int):
cdf = self.data.cancellations(days)
cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
if cdf.shape[0] > 0:
res = f'{user}\'s top 5 cancellations in the last {days} days:\n'
res += f'\n'.join(f'`{row["count"]} cancellations`\n{row["link"]}' for idx, row in cdf.iloc[:5].iterrows())
else:
res = f'No cancellations in the past {days} days'
res = f'No cancellations for {user} in the past {days} days'
return res
@@ -83,9 +89,8 @@ if __name__ == '__main__':
@client.event
async def on_ready():
print(f'{client.user} has connected to Discord!')
# await client.handle_ready()
# await get_and_save(RoboPage.db_path, client=client, limit=5000, days=7)
# msg_df, react_df = await get_and_save('messages.db', client=client, limit=5000, days=90)
await client.handle_ready()
print(client.data.cancellation_totals(14))
@client.event
@@ -93,4 +98,16 @@ if __name__ == '__main__':
await client.handle_message(message)
@client.event
async def on_raw_reaction_add(payload):
LOGGER.info(payload)
await client.data.update_reaction(payload=payload, client=client)
@client.event
async def on_raw_reaction_remove(payload):
LOGGER.info(payload)
await client.data.update_reaction(payload=payload, client=client)
client.run()