big rework of emoji calculations, much simpler and more efficient now
This commit is contained in:
85
data.py
85
data.py
@@ -1,14 +1,14 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import sqlite3
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import discord
|
||||
import pandas as pd
|
||||
from discord import RawReactionActionEvent
|
||||
|
||||
from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series, emoji_messages, \
|
||||
emoji_totals
|
||||
from msg import message_df, reaction_df, message_dict, LOGGER, convert_emoji, reaction_series
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
@@ -21,14 +21,15 @@ class MsgData:
|
||||
@classmethod
|
||||
async def create(cls, client: discord.Client, **kwargs):
|
||||
self = MsgData()
|
||||
self.lock = asyncio.Lock()
|
||||
async with self.lock:
|
||||
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
|
||||
self.msgs = self.msgs.sort_values('created')
|
||||
self.reactions: pd.DataFrame = await reaction_df(self.msgs['object'].tolist())
|
||||
self.lock = asyncio.Lock()
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def from_sql(cls, db):
|
||||
def from_sql(cls, db, local_tz='US/Central'):
|
||||
if isinstance(db, (str, Path)):
|
||||
con = sqlite3.connect(db)
|
||||
elif isinstance(db, sqlite3.Connection):
|
||||
@@ -36,7 +37,7 @@ class MsgData:
|
||||
|
||||
self = MsgData()
|
||||
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
|
||||
self.msgs['created'] = self.msgs['created'].apply(pd.to_datetime, utc=True)
|
||||
self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
|
||||
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
|
||||
return self
|
||||
|
||||
@@ -45,6 +46,8 @@ class MsgData:
|
||||
con = sqlite3.connect(db)
|
||||
elif isinstance(db, sqlite3.Connection):
|
||||
con = db
|
||||
else:
|
||||
raise TypeError(f'db argument is not a valid type: {type(db)}')
|
||||
|
||||
self.msgs.drop('object', axis=1).to_sql(
|
||||
name='msgs',
|
||||
@@ -69,7 +72,7 @@ class MsgData:
|
||||
|
||||
def __getitem__(self, item):
|
||||
if isinstance(item, str):
|
||||
return self.reactions.loc[pd.IndexSlice[:, item],].fillna(0).applymap(int)
|
||||
return self.emoji_messages(emoji_name=item).sort_values('count', ascending=False)
|
||||
elif isinstance(item, int):
|
||||
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
|
||||
|
||||
@@ -96,29 +99,60 @@ class MsgData:
|
||||
self.reactions = pd.concat([self.reactions, new])
|
||||
LOGGER.info(f'\n{str(new)}')
|
||||
|
||||
def emoji_messages(self, emoji_name: str, days: int):
|
||||
res = emoji_messages(msg_df=self.msgs, react_df=self.reactions, emoji_name=emoji_name, days=days)
|
||||
if res is None:
|
||||
raise KeyError(f'No emojis found for {emoji_name}')
|
||||
def emoji_messages(self, emoji_name: str, days: int = None) -> pd.DataFrame:
|
||||
"""Creates a DataFrame of the messages that have reactions with a certain emoji. Includes a 'count' column"""
|
||||
counts = self.emoji_counts(emoji_name)
|
||||
|
||||
# get the ids of messages that that have the targeted emoji
|
||||
count_ids = counts.index.drop_duplicates()
|
||||
|
||||
# filter by the messages that have actually been captured in the self.msgs DataFrame
|
||||
count_ids = count_ids[count_ids.isin(self.msgs.index.get_level_values(0))]
|
||||
|
||||
if count_ids.shape[0] > 0:
|
||||
res = self.msgs.loc[count_ids]
|
||||
|
||||
res['count'] = counts
|
||||
|
||||
if days is not None:
|
||||
res = res[res['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
|
||||
|
||||
return res.sort_values('created', ascending=False)
|
||||
else:
|
||||
return res
|
||||
raise KeyError(f'No messages found with {emoji_name} reactions')
|
||||
|
||||
def emoji_totals(self, emoji_name: str, days: int):
|
||||
return emoji_totals(edf=self.emoji_messages(emoji_name, days))
|
||||
def emoji_counts(self, emoji_name: str) -> pd.Series:
|
||||
assert isinstance(emoji_name, str), f'emoji_name must be a string'
|
||||
try:
|
||||
return self.reactions.loc[pd.IndexSlice[:, emoji_name],'count'].droplevel(1).sort_values(ascending=False)
|
||||
except KeyError as e:
|
||||
LOGGER.error(
|
||||
f' {emoji_name} not found out of {self.unique_emojis.shape[0]} unique emojis')
|
||||
raise
|
||||
|
||||
def emoji_leaderboard(self, emoji_name: str, days: int):
|
||||
df = self.emoji_totals(emoji_name, days)
|
||||
width = max(list(map(lambda s: len(str(s)), df.index.values)))
|
||||
@property
|
||||
def unique_emojis(self) -> pd.Index:
|
||||
return self.reactions.index.get_level_values(1).drop_duplicates()
|
||||
|
||||
def emoji_totals(self, emoji_name: str, days: int = None) -> pd.Series:
|
||||
"""Creates a Series of the counts for each user id"""
|
||||
return (self
|
||||
.emoji_messages(emoji_name, days)
|
||||
.groupby('user id')
|
||||
.apply(lambda gdf: gdf['count'].sum())
|
||||
.sort_values(ascending=False))
|
||||
|
||||
async def emoji_leaderboard(self, client: discord.Client, emoji_name: str, days: int):
|
||||
counts: pd.Series = self.emoji_totals(emoji_name, days)
|
||||
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])
|
||||
width = max(list(map(lambda s: len(str(s)), counts.index.values)))
|
||||
res = f'{emoji_name} totals, past {days} days\n'
|
||||
res += '\n'.join(
|
||||
f"`{str(name).ljust(width + 1)}with {row['total']:<2.0f} total`"
|
||||
for name, row in df.iterrows()
|
||||
f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
|
||||
for name, cnt in counts.iteritems()
|
||||
)
|
||||
return res
|
||||
|
||||
def cancellation_leaderboard(self, days):
|
||||
return self.emoji_leaderboard(emoji_name='cancelled', days=days)
|
||||
|
||||
def worst_offsenses(self, user: str, days: int):
|
||||
cdf = self.emoji_messages('cancelled', days=days)
|
||||
cdf = cdf[cdf['display_name'].str.contains(user, case=False)]
|
||||
@@ -133,10 +167,7 @@ class MsgData:
|
||||
return res
|
||||
|
||||
async def biggest_single(self, client: discord.Client, emoji: str, days: int) -> str:
|
||||
data = self.emoji_totals(emoji_name=emoji, days=days)
|
||||
username = data.index[0]
|
||||
reacted_msgs = self.emoji_messages(emoji_name=emoji, days=days)
|
||||
d = reacted_msgs.set_index('display_name')['user id'].drop_duplicates().to_dict()
|
||||
user: discord.User = await client.fetch_user(user_id=d[username])
|
||||
data: pd.Series = self.emoji_totals(emoji_name=emoji, days=days)
|
||||
user: discord.User = await client.fetch_user(user_id=data.index[0])
|
||||
LOGGER.info(f'User: {user.mention}')
|
||||
return f'{user.mention} with {data.iloc[0]["total"]:.0f} over the past {int(days)} days'
|
||||
return f'{user.mention} with {data.iloc[0]:.0f} over the past {int(days)} days'
|
||||
|
||||
35
msg.py
35
msg.py
@@ -5,7 +5,6 @@ from typing import Dict, Iterable
|
||||
|
||||
import discord
|
||||
import pandas as pd
|
||||
import pandas.errors
|
||||
from dotenv import load_dotenv
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
@@ -49,7 +48,7 @@ def message_dict(m: discord.Message) -> Dict:
|
||||
'user id': m.author.id,
|
||||
'message': m.content,
|
||||
'channel': m.channel.name,
|
||||
'channel link': f'<#{m.channel.id}>',
|
||||
'channel link': m.channel.mention,
|
||||
'link': m.jump_url,
|
||||
}
|
||||
|
||||
@@ -78,38 +77,6 @@ async def reaction_dict(r: discord.Reaction) -> Dict:
|
||||
}
|
||||
|
||||
|
||||
def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame:
|
||||
cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values
|
||||
|
||||
if emoji_name in cached_emojis:
|
||||
reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :]
|
||||
reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()]
|
||||
reacted_msgs = reacted_msgs[~reacted_msgs.index.duplicated()].sort_index()
|
||||
if reacted_msgs.shape[0] == 0:
|
||||
LOGGER.error(f'No messages found with {emoji_name} reactions')
|
||||
else:
|
||||
LOGGER.info(
|
||||
f'Found {reacted_msgs.shape[0]} messages for the leaderboard, ' + \
|
||||
f'{reactions["count"].sum():.0f} reactions total'
|
||||
)
|
||||
|
||||
try:
|
||||
reacted_msgs['count'] = reacted_msgs.index.to_series().apply(
|
||||
lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count'])
|
||||
except pandas.errors.InvalidIndexError as e:
|
||||
LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}')
|
||||
raise
|
||||
else:
|
||||
reacted_msgs = reacted_msgs[
|
||||
reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
|
||||
|
||||
reacted_msgs = reacted_msgs.sort_values('count', ascending=False)
|
||||
|
||||
return reacted_msgs
|
||||
else:
|
||||
LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}')
|
||||
|
||||
|
||||
def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame:
|
||||
totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int)
|
||||
max_channels = (
|
||||
|
||||
@@ -3,7 +3,6 @@ import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
|
||||
import discord
|
||||
from dotenv import load_dotenv
|
||||
@@ -26,7 +25,6 @@ class RoboPage(discord.Client):
|
||||
attrs = filter(lambda n: n.endswith('Joke') and not n.startswith('Joke'), dir(jokes))
|
||||
attrs = map(lambda n: getattr(jokes, n)(), attrs)
|
||||
self.jokes = list(attrs)
|
||||
self.lock = Lock()
|
||||
self.most_regex = re.compile("^who is the most (?P<emoji>\w+)(?: in the past (?P<days>\d+) days)?\??$",
|
||||
re.IGNORECASE)
|
||||
self.leaderboard_regex = re.compile('^most (?P<emoji>\w+) leaderboard$', re.IGNORECASE)
|
||||
@@ -56,10 +54,15 @@ class RoboPage(discord.Client):
|
||||
if hasattr(self, 'data'):
|
||||
await self.data.add_msg(message)
|
||||
|
||||
async with self.data.lock:
|
||||
if message.author != self.user:
|
||||
if (m := self.leaderboard_regex.match(message.content)) is not None:
|
||||
try:
|
||||
await message.reply(self.data.emoji_leaderboard(emoji_name=m.group('emoji').lower(), days=14))
|
||||
await message.reply(await self.data.emoji_leaderboard(
|
||||
client=self,
|
||||
emoji_name=m.group('emoji').lower(),
|
||||
days=14
|
||||
))
|
||||
except KeyError as e:
|
||||
LOGGER.exception(e)
|
||||
await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")
|
||||
|
||||
Reference in New Issue
Block a user