Files
kwaylon/data.py

174 lines
6.8 KiB
Python

import asyncio
import logging
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
import discord
import pandas as pd
from discord import RawReactionActionEvent
from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series
LOGGER = logging.getLogger(__name__)
class MsgData:
msgs: pd.DataFrame
reactions: pd.DataFrame
lock: asyncio.Lock
@classmethod
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
self.lock = asyncio.Lock()
async with self.lock:
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
return self
@classmethod
def from_sql(cls, db, local_tz='US/Central'):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
self = MsgData()
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
return self
def to_sql(self, db):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
else:
raise TypeError(f'db argument is not a valid type: {type(db)}')
self.msgs.drop('object', axis=1).to_sql(
name='msgs',
con=con,
if_exists='replace',
index=True,
index_label=self.msgs.index.name
)
self.reactions.to_sql(
name='reactions',
con=con,
if_exists='replace',
index=True,
index_label=self.reactions.index.name
)
def __str__(self):
return str(self.msgs) + '\n\n' + str(self.reactions)
def __repr__(self):
return f'<{__name__}.{self.__class__.__name__} with {self.msgs.shape[0]} messages and {self.reactions.shape[0]} reactions>'
def __getitem__(self, item):
if isinstance(item, str):
return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False)
elif isinstance(item, int):
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
async def add_msg(self, message: discord.Message):
async with self.lock:
mdict = message_dict(message)
mdict.pop('id')
self.msgs.loc[message.id] = pd.Series(mdict)
LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
async def update_reaction(self, client: discord.Client, payload: RawReactionActionEvent):
payload.emoji: discord.PartialEmoji = convert_emoji(payload.emoji)
chan: discord.TextChannel = await client.fetch_channel(channel_id=payload.channel_id)
msg: discord.Message = await chan.fetch_message(payload.message_id)
async with self.lock:
try:
self.reactions.drop(msg.id, level=0, axis=0)
except KeyError as e:
LOGGER.warning(e)
if (new := await reaction_series(msg=msg)) is not None:
new = new.set_index(['msg id', 'emoji'])
self.reactions = pd.concat([self.reactions, new])
LOGGER.info(f'\n{str(new)}')
def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame:
"""Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column"""
counts = self.emoji_counts(emoji_name)
# get the ids of messages that that have the targeted emoji
count_ids = counts.index.drop_duplicates()
# filter by the messages that have actually been captured in the self.msgs DataFrame
count_ids = count_ids[count_ids.isin(self.msgs.index.get_level_values(0))]
if count_ids.shape[0] > 0:
res = self.msgs.loc[count_ids]
res['count'] = counts
if days is not None:
res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
return res.sort_values('created', ascending=False)
else:
raise KeyError(f'No messages found with {emoji_name} reactions')
def emoji_counts(self, emoji_name: str) -> pd.Series:
assert isinstance(emoji_name, str), f'emoji_name must be a string'
try:
return self.reactions.loc[pd.IndexSlice[:, emoji_name],'count'].droplevel(1).sort_values(ascending=False)
except KeyError as e:
LOGGER.error(
f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis')
raise
@property
def unique_emojis(self) -> pd.Index:
return self.reactions.index.get_level_values(1).drop_duplicates()
def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series:
"""Creates a Series of the counts for each user id"""
return (self
.emoji_messages(emoji_name, days)
.groupby('user id')
.apply(lambda gdf: gdf['count'].sum())
.sort_values(ascending=False))
async def emoji_leaderboard(self, client: discord.Client, emoji_name: str, days: int):
counts: pd.Series = self.emoji_totals(emoji_name, days)
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])
width = max(list(map(lambda s: len(str(s)), counts.index.values)))
res = f'{emoji_name} totals, past {days} days\n'
res += '\n'.join(
f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
for name, cnt in counts.iteritems()
)
return res
def worst_offsenses(self, user: str, days: int):
cdf = self.emoji_messages('cancelled', days=days)
cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
if cdf.shape[0] > 0:
res = f'{user}\'s top 5 cancellations in the last {days} days:\n'
res += f'\n'.join(
f'`{row["count"]:<2.0f}cancellations`\n{row["link"]}' for idx, row in cdf.iloc[:5].iterrows())
else:
res = f'No cancellations for {user} in the past {days} days'
return res
async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str:
data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days)
user: discord.User = await client.fetch_user(user_id=data.index[0])
LOGGER.info(f'User: {user.mention}')
return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days'