big rework of emoji calculations, much simpler and more efficient now

This commit is contained in:
2021-08-13 19:31:22 -05:00
parent 8ef4de693a
commit 108a51b9fe
3 changed files with 92 additions and 91 deletions

85
data.py
View File

@@ -1,14 +1,14 @@
import asyncio
import logging
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
import discord
import pandas as pd
from discord import RawReactionActionEvent
from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series, emoji_messages, \
emoji_totals
from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series
LOGGER = logging.getLogger(__name__)
@@ -21,14 +21,15 @@ class MsgData:
@classmethod
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
self.lock = asyncio.Lock()
async with self.lock:
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
self.lock = asyncio.Lock()
return self
@classmethod
def from_sql(cls, db):
def from_sql(cls, db, local_tz='US/Central'):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
@@ -36,7 +37,7 @@ class MsgData:
self = MsgData()
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
self.msgs['created'] = self.msgs['created'].apply(pd.to_datetime, utc=True)
self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
return self
@@ -45,6 +46,8 @@ class MsgData:
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
else:
raise TypeError(f'db argument is not a valid type: {type(db)}')
self.msgs.drop('object', axis=1).to_sql(
name='msgs',
@@ -69,7 +72,7 @@ class MsgData:
def __getitem__(self, item):
if isinstance(item, str):
return self.reactions.loc[pd.IndexSlice[:, item],].fillna(0).applymap(int)
return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False)
elif isinstance(item, int):
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
@@ -96,29 +99,60 @@ class MsgData:
self.reactions = pd.concat([self.reactions, new])
LOGGER.info(f'\n{str(new)}')
def emoji_messages(self, emoji_name: str, days: int):
res = emoji_messages(msg_df=self.msgs, react_df=self.reactions, emoji_name=emoji_name, days=days)
if res is None:
raise KeyError(f'No emojis found for {emoji_name}')
def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame:
"""Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column"""
counts = self.emoji_counts(emoji_name)
# get the ids of messages that that have the targeted emoji
count_ids = counts.index.drop_duplicates()
# filter by the messages that have actually been captured in the self.msgs DataFrame
count_ids = count_ids[count_ids.isin(self.msgs.index.get_level_values(0))]
if count_ids.shape[0] > 0:
res = self.msgs.loc[count_ids]
res['count'] = counts
if days is not None:
res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
return res.sort_values('created', ascending=False)
else:
return res
raise KeyError(f'No messages found with {emoji_name} reactions')
def emoji_totals(self, emoji_name: str, days: int):
return emoji_totals(edf=self.emoji_messages(emoji_name, days))
def emoji_counts(self, emoji_name: str) -> pd.Series:
assert isinstance(emoji_name, str), f'emoji_name must be a string'
try:
return self.reactions.loc[pd.IndexSlice[:, emoji_name],'count'].droplevel(1).sort_values(ascending=False)
except KeyError as e:
LOGGER.error(
f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis')
raise
def emoji_leaderboard(self, emoji_name: str, days: int):
df = self.emoji_totals(emoji_name, days)
width = max(list(map(lambda s: len(str(s)), df.index.values)))
@property
def unique_emojis(self) -> pd.Index:
return self.reactions.index.get_level_values(1).drop_duplicates()
def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series:
"""Creates a Series of the counts for each user id"""
return (self
.emoji_messages(emoji_name, days)
.groupby('user id')
.apply(lambda gdf: gdf['count'].sum())
.sort_values(ascending=False))
async def emoji_leaderboard(self, client: discord.Client, emoji_name: str, days: int):
counts: pd.Series = self.emoji_totals(emoji_name, days)
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])
width = max(list(map(lambda s: len(str(s)), counts.index.values)))
res = f'{emoji_name} totals, past {days} days\n'
res += '\n'.join(
f"`{str(name).ljust(width + 1)}with {row['total']:<2.0f} total`"
for name, row in df.iterrows()
f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
for name, cnt in counts.iteritems()
)
return res
def cancellation_leaderboard(self, days):
return self.emoji_leaderboard(emoji_name='cancelled', days=days)
def worst_offsenses(self, user: str, days: int):
cdf = self.emoji_messages('cancelled', days=days)
cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
@@ -133,10 +167,7 @@ class MsgData:
return res
async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str:
data = self.emoji_totals(emoji_name=emoji, days=days)
username = data.index[0]
reacted_msgs = self.emoji_messages(emoji_name=emoji, days=days)
d = reacted_msgs.set_index('display_name')['user id'].drop_duplicates().to_dict()
user: discord.User = await client.fetch_user(user_id=d[username])
data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days)
user: discord.User = await client.fetch_user(user_id=data.index[0])
LOGGER.info(f'User: {user.mention}')
return f'{user.mention} with {data.iloc[0]["total"]:.0f} over the past {int(days)} days'
return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days'

35
msg.py
View File

@@ -5,7 +5,6 @@ from typing import Dict, Iterable
import discord
import pandas as pd
import pandas.errors
from dotenv import load_dotenv
LOGGER = logging.getLogger(__name__)
@@ -49,7 +48,7 @@ def message_dict(m: discord.Message) -> Dict:
'user id': m.author.id,
'message': m.content,
'channel': m.channel.name,
'channel link': f'<#{m.channel.id}>',
'channel link': m.channel.mention,
'link': m.jump_url,
}
@@ -78,38 +77,6 @@ async def reaction_dict(r: discord.Reaction) -> Dict:
}
def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame:
cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values
if emoji_name in cached_emojis:
reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :]
reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()]
reacted_msgs = reacted_msgs[~reacted_msgs.index.duplicated()].sort_index()
if reacted_msgs.shape[0] == 0:
LOGGER.error(f'No messages found with {emoji_name} reactions')
else:
LOGGER.info(
f'Found {reacted_msgs.shape[0]} messages for the leaderboard, ' + \
f'{reactions["count"].sum():.0f} reactions total'
)
try:
reacted_msgs['count'] = reacted_msgs.index.to_series().apply(
lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count'])
except pandas.errors.InvalidIndexError as e:
LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}')
raise
else:
reacted_msgs = reacted_msgs[
reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
reacted_msgs = reacted_msgs.sort_values('count', ascending=False)
return reacted_msgs
else:
LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}')
def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame:
totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int)
max_channels = (

View File

@@ -3,7 +3,6 @@ import logging
import os
import re
from pathlib import Path
from threading import Lock
import discord
from dotenv import load_dotenv
@@ -26,7 +25,6 @@ class RoboPage(discord.Client):
attrs = filter(lambda n: n.endswith('Joke') and not n.startswith('Joke'), dir(jokes))
attrs = map(lambda n: getattr(jokes, n)(), attrs)
self.jokes = list(attrs)
self.lock = Lock()
self.most_regex = re.compile("^who is the most (?P<emoji>\w+)(?: in the past (?P<days>\d+) days)?\??$",
re.IGNORECASE)
self.leaderboard_regex = re.compile('^most (?P<emoji>\w+) leaderboard$', re.IGNORECASE)
@@ -56,10 +54,15 @@ class RoboPage(discord.Client):
if hasattr(self, 'data'):
await self.data.add_msg(message)
async with self.data.lock:
if message.author != self.user:
if (m := self.leaderboard_regex.match(message.content)) is not None:
try:
await message.reply(self.data.emoji_leaderboard(emoji_name=m.group('emoji').lower(), days=14))
await message.reply(await self.data.emoji_leaderboard(
client=self,
emoji_name=m.group('emoji').lower(),
days=14
))
except KeyError as e:
LOGGER.exception(e)
await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")