better reaction storage

This commit is contained in:
jsl12
2022-01-22 01:45:49 -06:00
parent 64e7dc6bb0
commit a956ee7afe
4 changed files with 173 additions and 172 deletions

View File

@@ -3,64 +3,68 @@ import logging
import sqlite3
from datetime import datetime, timedelta
from pathlib import Path
from typing import Union
import discord
import pandas as pd
from nextcord import Client, Message
from .msg import message_df, full_reaction_df, message_dict, LOGGER, reaction_df
from .msg import LOGGER, reaction_df
LOGGER = logging.getLogger(__name__)
class MsgData:
db_path: Path
msgs: pd.DataFrame
reactions: pd.DataFrame
lock: asyncio.Lock
@classmethod
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
def __init__(self, path: Union[str, Path]):
self.lock = asyncio.Lock()
self.msgs: pd.DataFrame = await message_df(client, **kwargs)
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = full_reaction_df(self.msgs['object'].tolist())
return self
self.db_path: Path = Path(path) if isinstance(path, str) else path
@classmethod
def from_sql(cls, db, local_tz='US/Central'):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
@property
def sql_context(self):
return sqlite3.connect(self.db_path)
self = MsgData()
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id')
self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz)
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji'])
return self
def to_sql(self, db):
if isinstance(db, (str, Path)):
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
async def load_sql(self, local_tz='US/Central'):
async with self.lock:
with self.sql_context as con:
LOGGER.info(f'Opened {self.db_path.name}')
try:
self.reactions = pd.read_sql('select * from reactions', con=con).reset_index()
self.reactions['datetime'] = pd.to_datetime(self.reactions['datetime']).dt.tz_convert(local_tz)
except:
LOGGER.warning(f'failed to read reactions from: {self.db_path.resolve()}')
else:
raise TypeError(f'db argument is not a valid type: {type(db)}')
LOGGER.info(f'read {self.reactions.shape[0]:,} reactions')
con.close()
self.msgs.drop('object', axis=1).to_sql(
name='msgs',
con=con,
if_exists='replace',
index=True,
index_label=self.msgs.index.name
)
self.reactions.drop('object', axis=1).to_sql(
async def scan_messages(self, client: Client, **kwargs):
self.reactions = await reaction_df(client, **kwargs)
async with self.lock:
with self.sql_context as con:
self.reactions.to_sql(
name='reactions',
con=con,
if_exists='replace',
index=True,
index=False,
index_label=self.reactions.index.name
)
LOGGER.info(f'wrote {self.reactions.shape[0]:,} into {self.db_path.name}')
def most(self, emoji: str):
matching = self.reactions['emoji'] == emoji
if not matching.any():
LOGGER.info(f'No reactions with {emoji}')
return
else:
return self.reactions.loc[matching].sort_values('count', ascending=False).reset_index(drop=True)
async def fetch_message(self, client: Client, row: pd.Series):
guild = await client.fetch_guild(row['guild_id'])
channel = await guild.fetch_channel(row['channel_id'])
return await channel.fetch_message(row['msg_id'])
def __str__(self):
return str(self.msgs) + '\n\n' + str(self.reactions)
@@ -74,14 +78,14 @@ class MsgData:
elif isinstance(item, int):
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
async def add_msg(self, message: discord.Message):
async with self.lock:
mdict = message_dict(message)
mdict.pop('id')
self.msgs.loc[message.id] = pd.Series(mdict)
LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
# async def add_msg(self, message: Message):
# async with self.lock:
# mdict = message_dict(message)
# mdict.pop('id')
# self.msgs.loc[message.id] = pd.Series(mdict)
# LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
async def update_reaction(self, msg: discord.Message):
async def update_reaction(self, msg: Message):
# Drop all the reactions for this message id, if there are any
try:
async with self.lock:
@@ -96,8 +100,8 @@ class MsgData:
self.reactions = self.reactions.append(new)
LOGGER.info(str(new.droplevel(level=0, axis=0).loc[:, 'count']))
if msg.id not in self.msgs.index:
await self.add_msg(msg)
# if msg.id not in self.msgs.index:
# await self.add_msg(msg)
return new
@@ -157,7 +161,7 @@ class MsgData:
f' {type(emoji_name)}:{emoji_name}, {type(days)}:{days}')
# return pd.DataFrame()
async def emoji_user_counts(self, client: discord.Client, emoji_name: str, days: int = None):
async def emoji_user_counts(self, client: Client, emoji_name: str, days: int = None):
"""Creates a Series indexed by user display_name with the number of reactions with emoji_name as values"""
counts: pd.Series = self.emoji_totals(emoji_name, days)
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])

View File

@@ -1,18 +1,20 @@
import logging
import re
from datetime import timedelta, datetime
from pathlib import Path
import nextcord as discord
import pandas as pd
from nextcord import Client, Message, TextChannel
from . import jokes
from .data import MsgData
LIL_STINKY_ID = 704043422276780072
LOGGER = logging.getLogger(__name__)
class Kwaylon(discord.Client):
class Kwaylon(Client):
db_path: Path = Path('../messages.db')
def __init__(self, limit: int = 5000, days: int = 30, *args, **kwargs):
@@ -20,10 +22,7 @@ class Kwaylon(discord.Client):
self.limit, self.days = limit, days
self.jokes = list(jokes.collect_jokes())
self.most_regex = re.compile(
'^who is the most\s+(?P<emoji>\S+)\s*?(?:in the past (?P<days>\d+) days)?\??$',
re.IGNORECASE,
)
self.most_regex = re.compile('most\s+(?P<emoji>\S+)')
self.leaderboard_regex = re.compile(
'^most\s*?(?P<emoji>\S+?)\s*?(leaderboard|((?:.+?(?P<days>\d+) days)))',
re.IGNORECASE
@@ -31,97 +30,118 @@ class Kwaylon(discord.Client):
async def handle_ready(self):
async def alive():
channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility')
channel: TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility')
await channel.send('https://tenor.com/view/terminator-im-back-gif-19144173')
await channel.send(f"{discord.utils.get(self.emojis, name='kaylon')}")
# await alive()
# self.data: data.MsgData = await data.MsgData.create(client=self, limit=self.limit, days=self.days)
# self.data.to_sql(self.db_path)
# LOGGER.info(f'{self.data.msgs.shape[0]} messages total')
self.data = MsgData(path=Path('./messages.db'))
await self.data.load_sql()
if not hasattr(self.data, 'reactions'):
await self.data.scan_messages(client=self, limit=self.limit, days=self.days)
async def handle_message(self, message):
async def handle_message(self, message: Message):
if message.author != self.user:
if hasattr(self, 'data'):
await self.data.add_msg(message)
if (m := self.leaderboard_regex.match(message.content)) is not None:
try:
await message.reply(await self.leaderboard(match=m))
except KeyError as e:
LOGGER.exception(e)
await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")
return
elif (m := self.most_regex.match(message.content)) is not None:
try:
await message.reply(await self.biggest_single(match=m))
except Exception as e:
LOGGER.exception(e)
await message.reply('NObody')
for mention in message.mentions:
if mention.id == self.user.id and 'read' in message.content:
if (m := re.search('(\d+) days', message.content)):
days = int(m.group(1))
else:
LOGGER.warning(f'No self.data attribute')
days = self.days
await self.data.scan_messages(client=self, limit=self.limit, days=days)
return
# if hasattr(self, 'data'):
# await self.data.add_msg(message)
#
# if (m := self.leaderboard_regex.match(message.content)) is not None:
# try:
# await message.reply(await self.leaderboard(match=m))
# except KeyError as e:
# LOGGER.exception(e)
# await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")
# return
if (m := self.most_regex.match(message.clean_content)) is not None:
await self.data.load_sql()
emoji = get_emoji_name(m.group('emoji'))
LOGGER.info(emoji)
if (most := self.data.most(emoji=emoji)) is not None:
LOGGER.info(f'\n{str(most)}')
if (day_match := re.search('(?P<days>\d+) days', message.content)):
start = (datetime.today() - timedelta(days=int(day_match.group('days')))).astimezone()
valid_dates = most['datetime'] > start
most = most.loc[valid_dates]
if most.shape[0] > 0:
most = most.iloc[0]
msg = await self.data.fetch_message(self, most)
await message.reply(f'{msg.jump_url}')
LOGGER.info(f'{msg.clean_content}')
LOGGER.info(f' - {msg.author}')
LOGGER.info(f'{most["count"]}x {emoji}')
for joke in self.jokes:
if (m := joke.scan(message)) is not None:
LOGGER.info(f'{joke.__class__.__name__} detected: {message.content}, {m.group()}')
await joke.respond(message, self, m)
async def handle_raw_reaction(self, payload: discord.RawReactionActionEvent):
LOGGER.info(payload)
guild = await self.fetch_guild(payload.guild_id)
channel = await guild.fetch_channel(payload.channel_id)
message = await channel.fetch_message(payload.message_id)
# async def handle_raw_reaction(self, payload: RawReactionActionEvent):
# LOGGER.info(payload)
# guild = await self.fetch_guild(payload.guild_id)
# channel = await guild.fetch_channel(payload.channel_id)
# message = await channel.fetch_message(payload.message_id)
#
# if payload.event_type == 'REACTION_REMOVE':
# LOGGER.info(f'{payload.emoji} removed from\n{message.author}: {message.content}')
# elif payload.event_type == 'REACTION_ADD':
# LOGGER.info(
# f'{payload.member.display_name} added {payload.emoji} to\n' + \
# f'{message.author.display_name}: {message.content}')
#
# if hasattr(self, 'data'):
# await self.data.update_reaction(msg=message)
if payload.event_type == 'REACTION_REMOVE':
LOGGER.info(f'{payload.emoji} removed from\n{message.author}: {message.content}')
elif payload.event_type == 'REACTION_ADD':
LOGGER.info(
f'{payload.member.display_name} added {payload.emoji} to\n' + \
f'{message.author.display_name}: {message.content}')
if hasattr(self, 'data'):
await self.data.update_reaction(msg=message)
async def leaderboard(self, match: re.Match) -> str:
emoji_name = get_emoji_name(match.group('emoji'))
days = match.group('days') or 14
days = int(days)
counts = await self.data.emoji_user_counts(client=self,
emoji_name=emoji_name,
days=days)
width = max([len(str(s)) for s in counts.index.values])
res = f'{match.group("emoji")} totals, past {days} days\n'
res += '\n'.join(f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
for name, cnt in counts.iteritems())
return res
async def biggest_single(self, match: re.Match) -> str:
days = match.group('days') or 14
days = int(days)
data: pd.Series = self.data.emoji_totals(
emoji_name=get_emoji_name(match.group('emoji')),
days=days
)
user: discord.User = await self.fetch_user(user_id=data.index[0])
LOGGER.info(f'User: {user.mention}')
msg = f'{user.mention} with {data.iloc[0]:.0f}x {match.group("emoji")} over the past {days} days'
msg += '\n' + await self.worst_offsenses(user=user, days=days, top=3, emoji_str=match.group('emoji'))
return msg
async def worst_offsenses(self, user: discord.User, emoji_str: str, days: int = None, top: int = 3) -> str:
df: pd.DataFrame = self.data.emoji_messages(get_emoji_name(emoji_str), days=days)
df: pd.DataFrame = df[df['user id'] == user.id].sort_values('count', ascending=False).iloc[:top]
if df.shape[0] > 0:
res = f'Top {top} {emoji_str}\n'
res += f'\n'.join(
f'{emoji_str}x{row["count"]:.0f}\n{row["link"]}' for idx, row in df.iterrows())
else:
res = f'No {emoji_str} for {user} in the past {days} days'
return res
# async def leaderboard(self, match: re.Match) -> str:
# emoji_name = get_emoji_name(match.group('emoji'))
# days = match.group('days') or 14
# days = int(days)
# counts = await self.data.emoji_user_counts(client=self,
# emoji_name=emoji_name,
# days=days)
# width = max([len(str(s)) for s in counts.index.values])
# res = f'{match.group("emoji")} totals, past {days} days\n'
# res += '\n'.join(f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
# for name, cnt in counts.iteritems())
# return res
#
# async def biggest_single(self, match: re.Match) -> str:
# days = match.group('days') or 14
# days = int(days)
# data: pd.Series = self.data.emoji_totals(
# emoji_name=get_emoji_name(match.group('emoji')),
# days=days
# )
# user: User = await self.fetch_user(user_id=data.index[0])
# LOGGER.info(f'User: {user.mention}')
# msg = f'{user.mention} with {data.iloc[0]:.0f}x {match.group("emoji")} over the past {days} days'
# msg += '\n' + await self.worst_offsenses(user=user, days=days, top=3, emoji_str=match.group('emoji'))
# return msg
#
# async def worst_offsenses(self, user: User, emoji_str: str, days: int = None, top: int = 3) -> str:
# df: pd.DataFrame = self.data.emoji_messages(get_emoji_name(emoji_str), days=days)
# df: pd.DataFrame = df[df['user id'] == user.id].sort_values('count', ascending=False).iloc[:top]
#
# if df.shape[0] > 0:
# res = f'Top {top} {emoji_str}\n'
# res += f'\n'.join(
# f'{emoji_str}x{row["count"]:.0f}\n{row["link"]}' for idx, row in df.iterrows())
# else:
# res = f'No {emoji_str} for {user} in the past {days} days'
#
# return res
def get_emoji_name(string: str) -> str:

View File

@@ -1,23 +1,15 @@
import logging
from datetime import datetime, timedelta
from typing import Dict, Iterable
import pandas as pd
from nextcord import Client, Message
from nextcord import Reaction
from nextcord import Client, Message, Reaction
from nextcord import TextChannel
from nextcord.utils import AsyncIterator
LOGGER = logging.getLogger(__name__)
async def message_df(client: Client, **kwargs):
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> AsyncIterator:
async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> AsyncIterator[Message]:
if 'after' not in kwargs:
kwargs['after'] = (datetime.today() - timedelta(days=days))
elif isinstance((after := kwargs.get('after', None)), datetime):
@@ -25,6 +17,7 @@ async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> Asy
kwargs['limit'] = limit
LOGGER.info(kwargs)
for channel in client.get_all_channels():
if channel.category is not None and channel.category.name != 'Archive':
if isinstance(channel, TextChannel):
@@ -41,34 +34,20 @@ async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> Asy
LOGGER.info(f'Done getting messages')
def message_dict(m: Message) -> Dict:
return {
'object': m,
'id': m.id,
'created': m.created_at.astimezone(),
'display_name': m.author.display_name,
'user id': m.author.id,
'message': m.content,
'channel': m.channel.name,
'channel link': m.channel.mention,
'link': m.jump_url,
async def reaction_gen(client: Client, **kwargs) -> AsyncIterator[Reaction]:
async for msg in message_gen(client=client, **kwargs):
for reaction in msg.reactions:
yield {
'msg_id': reaction.message.id,
'emoji': reaction.emoji.name if reaction.is_custom_emoji() else reaction.emoji,
'emoji_id': reaction.emoji.id if reaction.is_custom_emoji() else None,
'channel_id': msg.channel.id,
'guild_id': msg.channel.guild.id,
'auth_id': msg.author.id,
'count': int(reaction.count),
'datetime': msg.created_at.astimezone(),
}
def full_reaction_df(msgs: Iterable[Message]):
return pd.concat([reaction_df(msg) for msg in msgs])
def reaction_df(msg: Message):
df = pd.DataFrame([reaction_dict(r) for r in msg.reactions])
return df.set_index(['msg id', 'emoji']) if not df.empty else df
def reaction_dict(r: Reaction) -> Dict:
return {
'object': r,
'msg id': r.message.id,
'emoji': r.emoji.name if r.is_custom_emoji() else r.emoji,
'emoji id': r.emoji.id if r.is_custom_emoji() else None,
'count': int(r.count),
}
async def reaction_df(client: Client, **kwargs):
return pd.DataFrame([r async for r in reaction_gen(client=client, **kwargs)])

View File

@@ -10,7 +10,6 @@ if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
# https://discordpy.readthedocs.io/en/stable/quickstart.html
client = Kwaylon(
# limit=100,
# days=10
@@ -36,6 +35,5 @@ if __name__ == '__main__':
# async def on_raw_reaction_remove(payload: RawReactionActionEvent):
# await client.handle_raw_reaction(payload)
load_dotenv()
client.run(os.getenv('DISCORD_TOKEN'))