Files
kwaylon/data.py

166 lines
6.8 KiB
Python

import asyncio
import logging
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
import discord
import pandas as pd
from msg import message_df, full_reaction_df, message_dict, LOGGER, reaction_df
LOGGER = logging.getLogger(__name__)
class MsgData:
msgs: pd.DataFrame
reactions: pd.DataFrame
lock: asyncio.Lock
@classmethod
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
self.lock = asyncio.Lock()
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = full_reaction_df(self.msgs['object'].tolist())
return self
@classmethod
def from_sql(cls, db, local_tz='US/Central'):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
self = MsgData()
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
return self
def to_sql(self, db):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
else:
raise TypeError(f'db argument is not a valid type: {type(db)}')
self.msgs.drop('object', axis=1).to_sql(
name='msgs',
con=con,
if_exists='replace',
index=True,
index_label=self.msgs.index.name
)
self.reactions.drop('object', axis=1).to_sql(
name='reactions',
con=con,
if_exists='replace',
index=True,
index_label=self.reactions.index.name
)
def __str__(self):
return str(self.msgs) + '\n\n' + str(self.reactions)
def __repr__(self):
return f'<{__name__}.{self.__class__.__name__} with {self.msgs.shape[0]} messages and {self.reactions.shape[0]} reactions>'
def __getitem__(self, item):
if isinstance(item, str):
return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False)
elif isinstance(item, int):
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
async def add_msg(self, message: discord.Message):
async with self.lock:
mdict = message_dict(message)
mdict.pop('id')
self.msgs.loc[message.id] = pd.Series(mdict)
LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
async def update_reaction(self, msg: discord.Message):
# Drop all the reactions for this message id, if there are any
try:
async with self.lock:
self.reactions.drop(msg.id, level=0, axis=0, inplace=True)
except KeyError as e:
pass
# If there are reactions on the message after the change
if len(msg.reactions) > 0:
new = reaction_df(msg)
async with self.lock:
# self.reactions = self.reactions.join(new, how='outer')
self.reactions = pd.concat([self.reactions, new])
LOGGER.info(str(new.droplevel(level=0, axis=0).loc[:, 'count']))
if msg.id not in self.msgs.index:
await self.add_msg(msg)
return new
def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame:
"""Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column"""
counts: pd.Series = self.emoji_counts(emoji_name)
# Get the ids of messages that that have the targeted emoji
message_id_counts: pd.Index = counts.index.drop_duplicates()
# There could be a situation where a message id in message_id_counts isn't actually in the self.msgs DataFrame
# Filter to keep only the messages that have actually been captured in the self.msgs DataFrame
message_id_counts: pd.Index = message_id_counts[message_id_counts.isin(self.msgs.index.get_level_values(0))]
# If there were actually some message ids found
if message_id_counts.shape[0] > 0:
# Select the relevant messages from the self.msgs DataFrame using the filtered message ids
res: pd.DataFrame = self.msgs.loc[message_id_counts]
# Add the 'count' column
res['count'] = counts
# If necessary, filter by days into the past
if days is not None and days > 0:
res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
# return the message DataFrame, sorted by created (sent) time
return res.sort_values('created', ascending=False)
else:
raise KeyError(f'No messages found with {emoji_name} reactions')
def emoji_counts(self, emoji_name: str) -> pd.Series:
"""Creates a Series indexed by message id and with the number of reactions with emoji_name as values"""
assert isinstance(emoji_name, str), f'emoji_name must be a string'
try:
return self.reactions.loc[pd.IndexSlice[:, emoji_name], 'count'].droplevel(1).sort_values(ascending=False)
except KeyError as e:
LOGGER.error(f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis')
LOGGER.error(f'{self.reactions.index.get_level_values(1)}')
raise
@property
def unique_emojis(self) -> pd.Index:
return self.reactions.index.get_level_values(1).drop_duplicates()
def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series:
"""Creates a Series indexed by user id and with the number of reactions with emoji_name as values"""
messages = self.emoji_messages(emoji_name, days)
if messages.shape[0] > 0:
return (messages
.groupby('user id')
.apply(lambda gdf: gdf['count'].sum())
.sort_values(ascending=False))
else:
raise ValueError(f'No messages found for' + \
f' {type(emoji_name)}:{emoji_name}, {type(days)}:{days}')
# return pd.DataFrame()
async def emoji_user_counts(self, client: discord.Client, emoji_name: str, days: int = None):
"""Creates a Series indexed by user display_name with the number of reactions with emoji_name as values"""
counts: pd.Series = self.emoji_totals(emoji_name, days)
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])
return counts