Files
kwaylon/data.py

178 lines
7.2 KiB
Python

import asyncio
import logging
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
import discord
import pandas as pd
from msg import message_df, full_reaction_df, message_dict, LOGGER, reaction_df
LOGGER = logging.getLogger(__name__)
class MsgData:
msgs: pd.DataFrame
reactions: pd.DataFrame
lock: asyncio.Lock
@classmethod
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
self.lock = asyncio.Lock()
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = full_reaction_df(self.msgs['object'].tolist())
return self
@classmethod
def from_sql(cls, db, local_tz='US/Central'):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
self = MsgData()
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
return self
def to_sql(self, db):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
else:
raise TypeError(f'db argument is not a valid type: {type(db)}')
self.msgs.drop('object', axis=1).to_sql(
name='msgs',
con=con,
if_exists='replace',
index=True,
index_label=self.msgs.index.name
)
self.reactions.drop('object', axis=1).to_sql(
name='reactions',
con=con,
if_exists='replace',
index=True,
index_label=self.reactions.index.name
)
def __str__(self):
return str(self.msgs) + '\n\n' + str(self.reactions)
def __repr__(self):
return f'<{__name__}.{self.__class__.__name__} with {self.msgs.shape[0]} messages and {self.reactions.shape[0]} reactions>'
def __getitem__(self, item):
if isinstance(item, str):
return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False)
elif isinstance(item, int):
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
async def add_msg(self, message: discord.Message):
async with self.lock:
mdict = message_dict(message)
mdict.pop('id')
self.msgs.loc[message.id] = pd.Series(mdict)
LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
async def update_reaction(self, msg: discord.Message):
# Drop all the reactions for this message id, if there are any
try:
async with self.lock:
self.reactions.drop(msg.id, level=0, axis=0, inplace=True)
except KeyError as e:
pass
# If there are reactions on the message after the change
if len(msg.reactions) > 0:
new = reaction_df(msg)
async with self.lock:
self.reactions = pd.concat([self.reactions, new])
LOGGER.info(str(new.droplevel(level=0, axis=0).loc[:, 'count']))
if msg.id not in self.msgs.index:
await self.add_msg(msg)
return new
def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame:
"""Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column"""
counts: pd.Series = self.emoji_counts(emoji_name)
# Get the ids of messages that that have the targeted emoji
message_id_counts: pd.Index = counts.index.drop_duplicates()
# There could be a situation where a message id in message_id_counts isn't actually in the self.msgs DataFrame
# Filter to keep only the messages that have actually been captured in the self.msgs DataFrame
message_id_counts: pd.Index = message_id_counts[message_id_counts.isin(self.msgs.index.get_level_values(0))]
# If there were actually some message ids found
if message_id_counts.shape[0] > 0:
res: pd.DataFrame = self.msgs.loc[message_id_counts]
res['count'] = counts
if days is not None and days > 0:
res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
return res.sort_values('created', ascending=False)
else:
raise KeyError(f'No messages found with {emoji_name} reactions')
def emoji_counts(self, emoji_name: str) -> pd.Series:
"""Creates a Series indexed by message id and with the number of reactions with emoji_name as values"""
assert isinstance(emoji_name, str), f'emoji_name must be a string'
try:
return self.reactions.loc[pd.IndexSlice[:, emoji_name], 'count'].droplevel(1).sort_values(ascending=False)
except KeyError as e:
LOGGER.error(f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis')
LOGGER.error(f'{self.reactions.index.get_level_values(1)}')
raise
@property
def unique_emojis(self) -> pd.Index:
return self.reactions.index.get_level_values(1).drop_duplicates()
def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series:
"""Creates a Series indexed by user id and with the number of reactions with emoji_name as values"""
return (self
.emoji_messages(emoji_name, days)
.groupby('user id')
.apply(lambda gdf: gdf['count'].sum())
.sort_values(ascending=False))
async def emoji_leaderboard(self, client: discord.Client, emoji_name: str, days: int):
counts: pd.Series = self.emoji_totals(emoji_name, days)
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])
width = max([len(str(s)) for s in counts.index.values])
res = f'{emoji_name} totals, past {days} days\n'
res += '\n'.join(f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
for name, cnt in counts.iteritems())
return res
def worst_offsenses(self, user: str, days: int):
cdf = self.emoji_messages('cancelled', days=days)
cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
if cdf.shape[0] > 0:
res = f'{user}\'s top 5 cancellations in the last {days} days:\n'
res += f'\n'.join(
f'`{row["count"]:<2.0f}cancellations`\n{row["link"]}' for idx, row in cdf.iloc[:5].iterrows())
else:
res = f'No cancellations for {user} in the past {days} days'
return res
async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str:
data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days)
user: discord.User = await client.fetch_user(user_id=data.index[0])
LOGGER.info(f'User: {user.mention}')
return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days'