created MsgData, reworked cancellation calculations

This commit is contained in:
2021-08-10 20:53:31 -05:00
parent 5776684b20
commit 156715879f
2 changed files with 132 additions and 107 deletions

176
msg.py
View File

@@ -1,20 +1,85 @@
import asyncio
import logging import logging
import os import os
import sqlite3
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Dict, Iterable, Tuple from typing import Dict, Iterable
import discord import discord
import pandas as pd import pandas as pd
from discord.raw_models import RawReactionActionEvent
from dotenv import load_dotenv from dotenv import load_dotenv
LOGGER = logging.getLogger(__name__)
class MsgData:
msgs: pd.DataFrame
reactions: pd.DataFrame
lock: asyncio.Lock
@classmethod
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
self.lock = asyncio.Lock()
return self
def __str__(self):
return str(self.msgs) + '\n\n' + str(self.reactions)
async def add_msg(self, message: discord.Message):
async with self.lock:
mdict = message_dict(message)
mdict.pop('id')
self.msgs.loc[message.id] = pd.Series(mdict)
LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
async def update_reaction(self, client: discord.Client, payload: RawReactionActionEvent):
if isinstance(payload.emoji, discord.PartialEmoji):
chan: discord.TextChannel = await client.fetch_channel(channel_id=payload.channel_id)
msg: discord.Message = await chan.fetch_message(id=payload.message_id)
idx = (msg.id, payload.emoji.name)
for reaction in msg.reactions:
if isinstance(reaction.emoji, discord.Emoji) and reaction.emoji.name == payload.emoji.name:
reactions = pd.Series(await reaction_dict(reaction))
async with self.lock:
self.reactions.loc[pd.IndexSlice[idx], :] = reactions
LOGGER.info(f'Added {str(idx)}, {int(self.reactions.loc[pd.IndexSlice[idx], "count"])} total')
break
else:
# only reaches here if the remove action was to take off the last reaction of that type
if payload.event_type == 'REACTION_REMOVE':
try:
async with self.lock:
self.reactions = self.reactions.drop(idx, axis=0)
except KeyError as e:
LOGGER.info(f'{idx} not in index')
else:
LOGGER.info(f'Dropped {idx}')
def cancellations(self, days: int = 14):
return cancellations(msg_df=self.msgs, react_df=self.reactions, days=days)
def cancellation_totals(self, days: int = 14):
return cancelled_totals(cdf=self.cancellations(days=days))
async def message_df(client: discord.Client, **kwargs):
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs): async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs):
channels = client.get_all_channels() channels = client.get_all_channels()
channels = filter(lambda c: isinstance(c, discord.TextChannel), channels) channels = filter(lambda c: isinstance(c, discord.TextChannel), channels)
channels = filter(lambda c: c.category.name != 'Archive', channels) channels = filter(lambda c: c.category.name != 'Archive', channels)
channels = sorted(channels, key=lambda c: (c.category.name, c.name))
for channel in channels: for channel in channels:
print(f'{channel.category.name} #{channel.name}') LOGGER.info(f'{channel.category.name} #{channel.name}')
if 'after' not in kwargs: if 'after' not in kwargs:
kwargs['after'] = (datetime.today() - timedelta(days=days)) kwargs['after'] = (datetime.today() - timedelta(days=days))
elif isinstance((after := kwargs.get('after', None)), datetime): elif isinstance((after := kwargs.get('after', None)), datetime):
@@ -36,100 +101,44 @@ def message_dict(m: discord.Message) -> Dict:
} }
async def message_df(client: discord.Client, **kwargs): async def reaction_df(msgs: Iterable[discord.Message]):
return pd.DataFrame( return pd.concat([await reaction_series(msg) for msg in msgs]).set_index(['msg id', 'emoji'])
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def reaction_series(msg: discord.Message): async def reaction_series(msg: discord.Message):
return pd.DataFrame( return pd.DataFrame(
[{ [
'msg id': msg.id, await reaction_dict(r)
'emoji': r.emoji.name,
'emoji id': r.emoji.id,
'count': r.count,
'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))),
}
for r in msg.reactions for r in msg.reactions
if isinstance(r.emoji, discord.Emoji) if isinstance(r.emoji, discord.Emoji)
] ]
) )
async def reaction_df(msgs: Iterable[discord.Message]): async def reaction_dict(r: discord.Reaction) -> Dict:
return pd.concat([await reaction_series(msg) for msg in msgs]) return {
'msg id': r.message.id,
'emoji': r.emoji.name,
def add_reactions(con: sqlite3.Connection, new_reacts: pd.DataFrame, table_name: str = 'reactions'): 'emoji id': r.emoji.id,
react_df = load_reactions(con, table_name) 'count': int(r.count),
react_df = react_df.append(new_reacts, ignore_index=True) # 'users': str(list(map(lambda u: u.display_name, (u for u in await r.users().flatten())))),
react_df = react_df.drop_duplicates(['msg id', 'emoji id']).reset_index(drop=True) }
try:
react_df.to_sql('reactions', con, if_exists='replace', index=False)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {react_df.shape[0]} reactions')
return react_df
def add_msgs(con: sqlite3.Connection, new_msgs: pd.DataFrame, table_name: str = 'msgs'):
msg_df = load_msgs(con, table_name)
msg_df = msg_df.append(new_msgs)
msg_df['created'] = pd.to_datetime(msg_df['created'], utc=True)
msg_df = msg_df[~msg_df.index.duplicated()].sort_values('created', ascending=False)
try:
msg_df.to_sql('msgs', con, if_exists='replace', index=True, index_label=msg_df.index.name)
except sqlite3.InterfaceError as e:
logging.exception(e)
else:
print(f'Saved {msg_df.shape[0]} messages')
return msg_df
async def get_and_save(db_file, client: discord.Client, limit: int, days: int):
df = await message_df(client, limit=limit, days=days)
print(f'Getting users for each reaction of {df.shape[0]} messages...')
reactions = await reaction_df(df['object'].tolist())
print('Done')
df = df.drop('object', axis=1)
con = sqlite3.connect(db_file)
try:
msg_df = add_msgs(con, df)
react_df = add_reactions(con, reactions)
except Exception as e:
logging.exception(e)
finally:
con.close()
return msg_df, react_df
def load_both(con: sqlite3.Connection) -> Tuple[pd.DataFrame, pd.DataFrame]:
return (load_msgs(con), load_reactions(con))
def load_msgs(con: sqlite3.Connection, table_name: str = 'msgs') -> pd.DataFrame:
df = pd.read_sql(f'select * from {table_name}', con, index_col='id')
df['created'] = df['created'].apply(pd.to_datetime, utc=True)
return df
def load_reactions(con: sqlite3.Connection, table_name: str = 'reactions') -> pd.DataFrame:
return pd.read_sql(f'select * from {table_name}', con)
def cancellations(msg_df, react_df, days: int = 10) -> pd.DataFrame: def cancellations(msg_df, react_df, days: int = 10) -> pd.DataFrame:
cancelled = react_df[react_df['emoji'] == 'cancelled'] # get reactions with a cancellation emoji
cancelled_msgs = msg_df.loc[cancelled['msg id'].to_list()] cancel_reactions = react_df.loc[pd.IndexSlice[:, 'cancelled'], :]
cancel_msgs = msg_df.loc[cancel_reactions.index.get_level_values(0).to_list()]
cancelled_msgs['created'] = cancelled_msgs['created'].apply(pd.to_datetime, utc=True) cancel_msgs['count'] = cancel_msgs.index.to_series().apply(
cancelled_msgs = cancelled_msgs[cancelled_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()] lambda idx: cancel_reactions.loc[pd.IndexSlice[idx, 'cancelled'], 'count'])
cancelled_msgs['count'] = cancelled.set_index('msg id').loc[cancelled_msgs.index]['count'] # filter outdated messages
cancelled_msgs = cancelled_msgs.sort_values('count', ascending=False) cancel_msgs = cancel_msgs[cancel_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
cancel_msgs = cancel_msgs.sort_values('count', ascending=False)
return cancel_msgs
return cancelled_msgs return cancelled_msgs
@@ -154,7 +163,7 @@ def cancelled_totals(cdf: pd.DataFrame) -> pd.DataFrame:
def report_string(df): def report_string(df):
width = max(list(map(lambda s: len(str(s)), df.index.values))) width = max(list(map(lambda s: len(str(s)), df.index.values)))
return '\n'.join( return '\n'.join(
f"`{name.ljust(width + 1)}with {row['total']:<2} total`" f"`{name.ljust(width + 1)}with {row['total']:<2.0f} total`"
for name, row in df.iterrows() for name, row in df.iterrows()
) )
@@ -168,7 +177,6 @@ if __name__ == '__main__':
@client.event @client.event
async def on_ready(): async def on_ready():
print(f'{client.user} has connected to Discord!') print(f'{client.user} has connected to Discord!')
await get_and_save('messages.db', client, limit=5000, days=90)
load_dotenv() load_dotenv()

View File

@@ -6,13 +6,15 @@ from threading import Lock
import discord import discord
from dotenv import load_dotenv from dotenv import load_dotenv
import msg
from jokes import CumJoke, BlackJoke, AssJoke, DominosJoke from jokes import CumJoke, BlackJoke, AssJoke, DominosJoke
from msg import get_and_save, cancellations, cancelled_totals, report_string
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
LIL_STINKY_ID = 704043422276780072 LIL_STINKY_ID = 704043422276780072
LOGGER = logging.getLogger(__name__)
class RoboPage(discord.Client): class RoboPage(discord.Client):
db_path: str = 'messages.db' db_path: str = 'messages.db'
@@ -31,45 +33,49 @@ class RoboPage(discord.Client):
return super().run(os.getenv('DISCORD_TOKEN')) return super().run(os.getenv('DISCORD_TOKEN'))
async def handle_ready(self): async def handle_ready(self):
async def alive():
channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility') channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility')
await channel.send(f"I'm aliiiiiive {discord.utils.get(self.emojis, name='kaylon')}") await channel.send(f"I'm aliiiiiive {discord.utils.get(self.emojis, name='kaylon')}")
self.data: msg.MsgData = await msg.MsgData.create(
client=self,
limit=3000,
# limit=20,
days=14,
)
LOGGER.info(str(self.data.msgs.columns))
LOGGER.info(str(self.data.reactions.columns))
async def handle_message(self, message): async def handle_message(self, message):
await self.data.add_msg(message)
if message.author != self.user: if message.author != self.user:
if 'most cancelled' in message.content: if 'most cancelled' in message.content:
msg: discord.Message = await message.reply('Hold please...') await message.reply(self.get_cancelled_totals(days=14))
await message.reply(await self.get_cancelled_totals(limit=1000, days=14))
elif (m := re.search('top cancelled (?P<name>\w+)', message.content)) is not None: elif (m := re.search('top cancelled (?P<name>\w+)', message.content)) is not None:
if self.lock.acquire(blocking=False): async with self.data.lock:
msg: discord.Message = await message.reply('Hold please...') await message.reply(self.top_cancellations(user=m.group('name'), days=14))
await message.reply(await self.top_cancellations(user=m.group('name'), limit=1000, days=14))
self.lock.release()
else:
await message.reply("I'm busy!")
for joke in self.jokes: for joke in self.jokes:
if (scan_res := joke.scan(message)): if (scan_res := joke.scan(message)):
print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}') print(f'{joke.__class__.__name__} detected:\n{message.content}\n{scan_res}')
await joke.respond(message, self, scan_res) await joke.respond(message, self, scan_res)
async def get_cancelled_totals(self, limit=1000, days: int = 90): def get_cancelled_totals(self, days):
msg_df, react_df = await get_and_save(self.db_path, client=self, limit=limit, days=days) res = self.data.cancellation_totals(days)
res = cancelled_totals(cancellations(msg_df, react_df, days=days)) res = f'Cancellation totals, past {days} days\n' + msg.report_string(res.iloc[:5])
res = f'Cancellation totals, past {days} days\n' + report_string(res.iloc[:5])
return res return res
async def top_cancellations(self, user: str, limit: int, days: int): def top_cancellations(self, user: str, days: int):
msg_df, react_df = await get_and_save(self.db_path, client=self, limit=limit, days=days) cdf = self.data.cancellations(days)
cdf = cancellations(msg_df, react_df, days=days)
cdf = cdf[cdf['display_name'].str.contains(user, case=False)] cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
if cdf.shape[0] > 0: if cdf.shape[0] > 0:
res = f'{user}\'s top 5 cancellations in the last {days} days:\n' res = f'{user}\'s top 5 cancellations in the last {days} days:\n'
res += f'\n'.join(f'`{row["count"]} cancellations`\n{row["link"]}' for idx, row in cdf.iloc[:5].iterrows()) res += f'\n'.join(f'`{row["count"]} cancellations`\n{row["link"]}' for idx, row in cdf.iloc[:5].iterrows())
else: else:
res = f'No cancellations in the past {days} days' res = f'No cancellations for {user} in the past {days} days'
return res return res
@@ -83,9 +89,8 @@ if __name__ == '__main__':
@client.event @client.event
async def on_ready(): async def on_ready():
print(f'{client.user} has connected to Discord!') print(f'{client.user} has connected to Discord!')
# await client.handle_ready() await client.handle_ready()
# await get_and_save(RoboPage.db_path, client=client, limit=5000, days=7) print(client.data.cancellation_totals(14))
# msg_df, react_df = await get_and_save('messages.db', client=client, limit=5000, days=90)
@client.event @client.event
@@ -93,4 +98,16 @@ if __name__ == '__main__':
await client.handle_message(message) await client.handle_message(message)
@client.event
async def on_raw_reaction_add(payload):
LOGGER.info(payload)
await client.data.update_reaction(payload=payload, client=client)
@client.event
async def on_raw_reaction_remove(payload):
LOGGER.info(payload)
await client.data.update_reaction(payload=payload, client=client)
client.run() client.run()