big rework of emoji calculations, much simpler and more efficient now

This commit is contained in:
2021-08-13 19:31:22 -05:00
parent 8ef4de693a
commit 108a51b9fe
3 changed files with 92 additions and 91 deletions

85
data.py
View File

@@ -1,14 +1,14 @@
import asyncio import asyncio
import logging import logging
import sqlite3 import sqlite3
from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
import discord import discord
import pandas as pd import pandas as pd
from discord import RawReactionActionEvent from discord import RawReactionActionEvent
from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series, emoji_messages, \ from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series
emoji_totals
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
@@ -21,14 +21,15 @@ class MsgData:
@classmethod @classmethod
async def create(cls, client: discord.Client, **kwargs): async def create(cls, client: discord.Client, **kwargs):
self = MsgData() self = MsgData()
self.lock = asyncio.Lock()
async with self.lock:
self.msgs: pd.DataFrame = await message_df(client, **kwargs) self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created') self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist()) self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
self.lock = asyncio.Lock()
return self return self
@classmethod @classmethod
def from_sql(cls, db): def from_sql(cls, db, local_tz='US/Central'):
if isinstance(db, (str, Path)): if isinstance(db, (str, Path)):
con = sqlite3.connect(db) con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection): elif isinstance(db, sqlite3.Connection):
@@ -36,7 +37,7 @@ class MsgData:
self = MsgData() self = MsgData()
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id') self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
self.msgs['created'] = self.msgs['created'].apply(pd.to_datetime, utc=True) self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji']) self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
return self return self
@@ -45,6 +46,8 @@ class MsgData:
con = sqlite3.connect(db) con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection): elif isinstance(db, sqlite3.Connection):
con = db con = db
else:
raise TypeError(f'db argument is not a valid type: {type(db)}')
self.msgs.drop('object', axis=1).to_sql( self.msgs.drop('object', axis=1).to_sql(
name='msgs', name='msgs',
@@ -69,7 +72,7 @@ class MsgData:
def __getitem__(self, item): def __getitem__(self, item):
if isinstance(item, str): if isinstance(item, str):
return self.reactions.loc[pd.IndexSlice[:, item],].fillna(0).applymap(int) return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False)
elif isinstance(item, int): elif isinstance(item, int):
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int) return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
@@ -96,29 +99,60 @@ class MsgData:
self.reactions = pd.concat([self.reactions, new]) self.reactions = pd.concat([self.reactions, new])
LOGGER.info(f'\n{str(new)}') LOGGER.info(f'\n{str(new)}')
def emoji_messages(self, emoji_name: str, days: int): def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame:
res = emoji_messages(msg_df=self.msgs, react_df=self.reactions, emoji_name=emoji_name, days=days) """Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column"""
if res is None: counts = self.emoji_counts(emoji_name)
raise KeyError(f'No emojis found for {emoji_name}')
# get the ids of messages that that have the targeted emoji
count_ids = counts.index.drop_duplicates()
# filter by the messages that have actually been captured in the self.msgs DataFrame
count_ids = count_ids[count_ids.isin(self.msgs.index.get_level_values(0))]
if count_ids.shape[0] > 0:
res = self.msgs.loc[count_ids]
res['count'] = counts
if days is not None:
res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
return res.sort_values('created', ascending=False)
else: else:
return res raise KeyError(f'No messages found with {emoji_name} reactions')
def emoji_totals(self, emoji_name: str, days: int): def emoji_counts(self, emoji_name: str) -> pd.Series:
return emoji_totals(edf=self.emoji_messages(emoji_name, days)) assert isinstance(emoji_name, str), f'emoji_name must be a string'
try:
return self.reactions.loc[pd.IndexSlice[:, emoji_name],'count'].droplevel(1).sort_values(ascending=False)
except KeyError as e:
LOGGER.error(
f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis')
raise
def emoji_leaderboard(self, emoji_name: str, days: int): @property
df = self.emoji_totals(emoji_name, days) def unique_emojis(self) -> pd.Index:
width = max(list(map(lambda s: len(str(s)), df.index.values))) return self.reactions.index.get_level_values(1).drop_duplicates()
def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series:
"""Creates a Series of the counts for each user id"""
return (self
.emoji_messages(emoji_name, days)
.groupby('user id')
.apply(lambda gdf: gdf['count'].sum())
.sort_values(ascending=False))
async def emoji_leaderboard(self, client: discord.Client, emoji_name: str, days: int):
counts: pd.Series = self.emoji_totals(emoji_name, days)
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])
width = max(list(map(lambda s: len(str(s)), counts.index.values)))
res = f'{emoji_name} totals, past {days} days\n' res = f'{emoji_name} totals, past {days} days\n'
res += '\n'.join( res += '\n'.join(
f"`{str(name).ljust(width + 1)}with {row['total']:<2.0f} total`" f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
for name, row in df.iterrows() for name, cnt in counts.iteritems()
) )
return res return res
def cancellation_leaderboard(self, days):
return self.emoji_leaderboard(emoji_name='cancelled', days=days)
def worst_offsenses(self, user: str, days: int): def worst_offsenses(self, user: str, days: int):
cdf = self.emoji_messages('cancelled', days=days) cdf = self.emoji_messages('cancelled', days=days)
cdf = cdf[cdf['display_name'].str.contains(user, case=False)] cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
@@ -133,10 +167,7 @@ class MsgData:
return res return res
async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str: async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str:
data = self.emoji_totals(emoji_name=emoji, days=days) data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days)
username = data.index[0] user: discord.User = await client.fetch_user(user_id=data.index[0])
reacted_msgs = self.emoji_messages(emoji_name=emoji, days=days)
d = reacted_msgs.set_index('display_name')['user id'].drop_duplicates().to_dict()
user: discord.User = await client.fetch_user(user_id=d[username])
LOGGER.info(f'User: {user.mention}') LOGGER.info(f'User: {user.mention}')
return f'{user.mention} with {data.iloc[0]["total"]:.0f} over the past {int(days)} days' return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days'

35
msg.py
View File

@@ -5,7 +5,6 @@ from typing import Dict, Iterable
import discord import discord
import pandas as pd import pandas as pd
import pandas.errors
from dotenv import load_dotenv from dotenv import load_dotenv
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
@@ -49,7 +48,7 @@ def message_dict(m: discord.Message) -> Dict:
'user id': m.author.id, 'user id': m.author.id,
'message': m.content, 'message': m.content,
'channel': m.channel.name, 'channel': m.channel.name,
'channel link': f'<#{m.channel.id}>', 'channel link': m.channel.mention,
'link': m.jump_url, 'link': m.jump_url,
} }
@@ -78,38 +77,6 @@ async def reaction_dict(r: discord.Reaction) -> Dict:
} }
def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame:
cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values
if emoji_name in cached_emojis:
reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :]
reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()]
reacted_msgs = reacted_msgs[~reacted_msgs.index.duplicated()].sort_index()
if reacted_msgs.shape[0] == 0:
LOGGER.error(f'No messages found with {emoji_name} reactions')
else:
LOGGER.info(
f'Found {reacted_msgs.shape[0]} messages for the leaderboard, ' + \
f'{reactions["count"].sum():.0f} reactions total'
)
try:
reacted_msgs['count'] = reacted_msgs.index.to_series().apply(
lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count'])
except pandas.errors.InvalidIndexError as e:
LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}')
raise
else:
reacted_msgs = reacted_msgs[
reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
reacted_msgs = reacted_msgs.sort_values('count', ascending=False)
return reacted_msgs
else:
LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}')
def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame: def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame:
totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int) totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int)
max_channels = ( max_channels = (

View File

@@ -3,7 +3,6 @@ import logging
import os import os
import re import re
from pathlib import Path from pathlib import Path
from threading import Lock
import discord import discord
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -26,7 +25,6 @@ class RoboPage(discord.Client):
attrs = filter(lambda n: n.endswith('Joke') and not n.startswith('Joke'), dir(jokes)) attrs = filter(lambda n: n.endswith('Joke') and not n.startswith('Joke'), dir(jokes))
attrs = map(lambda n: getattr(jokes, n)(), attrs) attrs = map(lambda n: getattr(jokes, n)(), attrs)
self.jokes = list(attrs) self.jokes = list(attrs)
self.lock = Lock()
self.most_regex = re.compile("^who is the most (?P<emoji>\w+)(?: in the past (?P<days>\d+) days)?\??$", self.most_regex = re.compile("^who is the most (?P<emoji>\w+)(?: in the past (?P<days>\d+) days)?\??$",
re.IGNORECASE) re.IGNORECASE)
self.leaderboard_regex = re.compile('^most (?P<emoji>\w+) leaderboard$', re.IGNORECASE) self.leaderboard_regex = re.compile('^most (?P<emoji>\w+) leaderboard$', re.IGNORECASE)
@@ -56,10 +54,15 @@ class RoboPage(discord.Client):
if hasattr(self, 'data'): if hasattr(self, 'data'):
await self.data.add_msg(message) await self.data.add_msg(message)
async with self.data.lock:
if message.author != self.user: if message.author != self.user:
if (m := self.leaderboard_regex.match(message.content)) is not None: if (m := self.leaderboard_regex.match(message.content)) is not None:
try: try:
await message.reply(self.data.emoji_leaderboard(emoji_name=m.group('emoji').lower(), days=14)) await message.reply(await self.data.emoji_leaderboard(
client=self,
emoji_name=m.group('emoji').lower(),
days=14
))
except KeyError as e: except KeyError as e:
LOGGER.exception(e) LOGGER.exception(e)
await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!") await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")