From a956ee7afed12efda12c4ea974b2950ee7845ebe Mon Sep 17 00:00:00 2001 From: jsl12 <32917998+jsl12@users.noreply.github.com> Date: Sat, 22 Jan 2022 01:45:49 -0600 Subject: [PATCH] better reaction storage --- kwaylon/data.py | 104 +++++++++++++------------- kwaylon/kwaylon.py | 182 +++++++++++++++++++++++++-------------------- kwaylon/msg.py | 57 +++++--------- main.py | 2 - 4 files changed, 173 insertions(+), 172 deletions(-) diff --git a/kwaylon/data.py b/kwaylon/data.py index ffa6cde..c6c08a7 100644 --- a/kwaylon/data.py +++ b/kwaylon/data.py @@ -3,64 +3,68 @@ import logging import sqlite3 from datetime import datetime, timedelta from pathlib import Path +from typing import Union -import discord import pandas as pd +from nextcord import Client, Message -from .msg import message_df, full_reaction_df, message_dict, LOGGER, reaction_df +from .msg import LOGGER, reaction_df LOGGER = logging.getLogger(__name__) class MsgData: + db_path: Path msgs: pd.DataFrame reactions: pd.DataFrame lock: asyncio.Lock - @classmethod - async def create(cls, client: discord.Client, **kwargs): - self = MsgData() + def __init__(self, path: Union[str, Path]): self.lock = asyncio.Lock() - self.msgs: pd.DataFrame = await message_df(client, **kwargs) - self.msgs = self.msgs.sort_values('created') - self.reactions: pd.DataFrame = full_reaction_df(self.msgs['object'].tolist()) - return self + self.db_path: Path = Path(path) if isinstance(path, str) else path - @classmethod - def from_sql(cls, db, local_tz='US/Central'): - if isinstance(db, (str, Path)): - con = sqlite3.connect(db) - elif isinstance(db, sqlite3.Connection): - con = db + @property + def sql_context(self): + return sqlite3.connect(self.db_path) - self = MsgData() - self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id') - self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz) - self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji']) - return self + async def load_sql(self, local_tz='US/Central'): + async with self.lock: + with self.sql_context as con: + LOGGER.info(f'Opened {self.db_path.name}') + try: + self.reactions = pd.read_sql('select * from reactions', con=con).reset_index() + self.reactions['datetime'] = pd.to_datetime(self.reactions['datetime']).dt.tz_convert(local_tz) + except: + LOGGER.warning(f'failed to read reactions from: {self.db_path.resolve()}') + else: + LOGGER.info(f'read {self.reactions.shape[0]:,} reactions') + con.close() - def to_sql(self, db): - if isinstance(db, (str, Path)): - con = sqlite3.connect(db) - elif isinstance(db, sqlite3.Connection): - con = db + async def scan_messages(self, client: Client, **kwargs): + self.reactions = await reaction_df(client, **kwargs) + async with self.lock: + with self.sql_context as con: + self.reactions.to_sql( + name='reactions', + con=con, + if_exists='replace', + index=False, + index_label=self.reactions.index.name + ) + LOGGER.info(f'wrote {self.reactions.shape[0]:,} into {self.db_path.name}') + + def most(self, emoji: str): + matching = self.reactions['emoji'] == emoji + if not matching.any(): + LOGGER.info(f'No reactions with {emoji}') + return else: - raise TypeError(f'db argument is not a valid type: {type(db)}') + return self.reactions.loc[matching].sort_values('count', ascending=False).reset_index(drop=True) - self.msgs.drop('object', axis=1).to_sql( - name='msgs', - con=con, - if_exists='replace', - index=True, - index_label=self.msgs.index.name - ) - self.reactions.drop('object', axis=1).to_sql( - name='reactions', - con=con, - if_exists='replace', - index=True, - index_label=self.reactions.index.name - ) + async def fetch_message(self, client: Client, row: pd.Series): + guild = await client.fetch_guild(row['guild_id']) + channel = await guild.fetch_channel(row['channel_id']) + return await channel.fetch_message(row['msg_id']) def __str__(self): return str(self.msgs) + '\n\n' + str(self.reactions) @@ -74,14 +78,14 @@ class MsgData: elif isinstance(item, int): return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int) - async def add_msg(self, message: discord.Message): - async with self.lock: - mdict = message_dict(message) - mdict.pop('id') - self.msgs.loc[message.id] = pd.Series(mdict) - LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}') + # async def add_msg(self, message: Message): + # async with self.lock: + # mdict = message_dict(message) + # mdict.pop('id') + # self.msgs.loc[message.id] = pd.Series(mdict) + # LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}') - async def update_reaction(self, msg: discord.Message): + async def update_reaction(self, msg: Message): # Drop all the reactions for this message id, if there are any try: async with self.lock: @@ -96,8 +100,8 @@ class MsgData: self.reactions = self.reactions.append(new) LOGGER.info(str(new.droplevel(level=0, axis=0).loc[:, 'count'])) - if msg.id not in self.msgs.index: - await self.add_msg(msg) + # if msg.id not in self.msgs.index: + # await self.add_msg(msg) return new @@ -157,7 +161,7 @@ class MsgData: f' {type(emoji_name)}:{emoji_name}, {type(days)}:{days}') # return pd.DataFrame() - async def emoji_user_counts(self, client: discord.Client, emoji_name: str, days: int = None): + async def emoji_user_counts(self, client: Client, emoji_name: str, days: int = None): """Creates a Series indexed by user display_name with the number of reactions with emoji_name as values""" counts: pd.Series = self.emoji_totals(emoji_name, days) counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index]) diff --git a/kwaylon/kwaylon.py b/kwaylon/kwaylon.py index 16878e3..38875dc 100644 --- a/kwaylon/kwaylon.py +++ b/kwaylon/kwaylon.py @@ -1,18 +1,20 @@ import logging import re +from datetime import timedelta, datetime from pathlib import Path import nextcord as discord -import pandas as pd +from nextcord import Client, Message, TextChannel from . import jokes +from .data import MsgData LIL_STINKY_ID = 704043422276780072 LOGGER = logging.getLogger(__name__) -class Kwaylon(discord.Client): +class Kwaylon(Client): db_path: Path = Path('../messages.db') def __init__(self, limit: int = 5000, days: int = 30, *args, **kwargs): @@ -20,10 +22,7 @@ class Kwaylon(discord.Client): self.limit, self.days = limit, days self.jokes = list(jokes.collect_jokes()) - self.most_regex = re.compile( - '^who is the most\s+(?P\S+)\s*?(?:in the past (?P\d+) days)?\??$', - re.IGNORECASE, - ) + self.most_regex = re.compile('most\s+(?P\S+)') self.leaderboard_regex = re.compile( '^most\s*?(?P\S+?)\s*?(leaderboard|((?:.+?(?P\d+) days)))', re.IGNORECASE @@ -31,97 +30,118 @@ class Kwaylon(discord.Client): async def handle_ready(self): async def alive(): - channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility') + channel: TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility') await channel.send('https://tenor.com/view/terminator-im-back-gif-19144173') await channel.send(f"{discord.utils.get(self.emojis, name='kaylon')}") # await alive() - # self.data: data.MsgData = await data.MsgData.create(client=self, limit=self.limit, days=self.days) - # self.data.to_sql(self.db_path) - # LOGGER.info(f'{self.data.msgs.shape[0]} messages total') + self.data = MsgData(path=Path('./messages.db')) + await self.data.load_sql() + if not hasattr(self.data, 'reactions'): + await self.data.scan_messages(client=self, limit=self.limit, days=self.days) - async def handle_message(self, message): + async def handle_message(self, message: Message): if message.author != self.user: - if hasattr(self, 'data'): - await self.data.add_msg(message) + for mention in message.mentions: + if mention.id == self.user.id and 'read' in message.content: + if (m := re.search('(\d+) days', message.content)): + days = int(m.group(1)) + else: + days = self.days - if (m := self.leaderboard_regex.match(message.content)) is not None: - try: - await message.reply(await self.leaderboard(match=m)) - except KeyError as e: - LOGGER.exception(e) - await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!") - return + await self.data.scan_messages(client=self, limit=self.limit, days=days) + return + # if hasattr(self, 'data'): + # await self.data.add_msg(message) + # + # if (m := self.leaderboard_regex.match(message.content)) is not None: + # try: + # await message.reply(await self.leaderboard(match=m)) + # except KeyError as e: + # LOGGER.exception(e) + # await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!") + # return - elif (m := self.most_regex.match(message.content)) is not None: - try: - await message.reply(await self.biggest_single(match=m)) - except Exception as e: - LOGGER.exception(e) - await message.reply('NObody') - else: - LOGGER.warning(f'No self.data attribute') + if (m := self.most_regex.match(message.clean_content)) is not None: + await self.data.load_sql() + emoji = get_emoji_name(m.group('emoji')) + LOGGER.info(emoji) + + if (most := self.data.most(emoji=emoji)) is not None: + LOGGER.info(f'\n{str(most)}') + if (day_match := re.search('(?P\d+) days', message.content)): + start = (datetime.today() - timedelta(days=int(day_match.group('days')))).astimezone() + valid_dates = most['datetime'] > start + most = most.loc[valid_dates] + + if most.shape[0] > 0: + most = most.iloc[0] + msg = await self.data.fetch_message(self, most) + await message.reply(f'{msg.jump_url}') + LOGGER.info(f'{msg.clean_content}') + LOGGER.info(f' - {msg.author}') + LOGGER.info(f'{most["count"]}x {emoji}') for joke in self.jokes: if (m := joke.scan(message)) is not None: LOGGER.info(f'{joke.__class__.__name__} detected: {message.content}, {m.group()}') await joke.respond(message, self, m) - async def handle_raw_reaction(self, payload: discord.RawReactionActionEvent): - LOGGER.info(payload) - guild = await self.fetch_guild(payload.guild_id) - channel = await guild.fetch_channel(payload.channel_id) - message = await channel.fetch_message(payload.message_id) + # async def handle_raw_reaction(self, payload: RawReactionActionEvent): + # LOGGER.info(payload) + # guild = await self.fetch_guild(payload.guild_id) + # channel = await guild.fetch_channel(payload.channel_id) + # message = await channel.fetch_message(payload.message_id) + # + # if payload.event_type == 'REACTION_REMOVE': + # LOGGER.info(f'{payload.emoji} removed from\n{message.author}: {message.content}') + # elif payload.event_type == 'REACTION_ADD': + # LOGGER.info( + # f'{payload.member.display_name} added {payload.emoji} to\n' + \ + # f'{message.author.display_name}: {message.content}') + # + # if hasattr(self, 'data'): + # await self.data.update_reaction(msg=message) - if payload.event_type == 'REACTION_REMOVE': - LOGGER.info(f'{payload.emoji} removed from\n{message.author}: {message.content}') - elif payload.event_type == 'REACTION_ADD': - LOGGER.info( - f'{payload.member.display_name} added {payload.emoji} to\n' + \ - f'{message.author.display_name}: {message.content}') - - if hasattr(self, 'data'): - await self.data.update_reaction(msg=message) - - async def leaderboard(self, match: re.Match) -> str: - emoji_name = get_emoji_name(match.group('emoji')) - days = match.group('days') or 14 - days = int(days) - counts = await self.data.emoji_user_counts(client=self, - emoji_name=emoji_name, - days=days) - width = max([len(str(s)) for s in counts.index.values]) - res = f'{match.group("emoji")} totals, past {days} days\n' - res += '\n'.join(f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`" - for name, cnt in counts.iteritems()) - return res - - async def biggest_single(self, match: re.Match) -> str: - days = match.group('days') or 14 - days = int(days) - data: pd.Series = self.data.emoji_totals( - emoji_name=get_emoji_name(match.group('emoji')), - days=days - ) - user: discord.User = await self.fetch_user(user_id=data.index[0]) - LOGGER.info(f'User: {user.mention}') - msg = f'{user.mention} with {data.iloc[0]:.0f}x {match.group("emoji")} over the past {days} days' - msg += '\n' + await self.worst_offsenses(user=user, days=days, top=3, emoji_str=match.group('emoji')) - return msg - - async def worst_offsenses(self, user: discord.User, emoji_str: str, days: int = None, top: int = 3) -> str: - df: pd.DataFrame = self.data.emoji_messages(get_emoji_name(emoji_str), days=days) - df: pd.DataFrame = df[df['user id'] == user.id].sort_values('count', ascending=False).iloc[:top] - - if df.shape[0] > 0: - res = f'Top {top} {emoji_str}\n' - res += f'\n'.join( - f'{emoji_str}x{row["count"]:.0f}\n{row["link"]}' for idx, row in df.iterrows()) - else: - res = f'No {emoji_str} for {user} in the past {days} days' - - return res + # async def leaderboard(self, match: re.Match) -> str: + # emoji_name = get_emoji_name(match.group('emoji')) + # days = match.group('days') or 14 + # days = int(days) + # counts = await self.data.emoji_user_counts(client=self, + # emoji_name=emoji_name, + # days=days) + # width = max([len(str(s)) for s in counts.index.values]) + # res = f'{match.group("emoji")} totals, past {days} days\n' + # res += '\n'.join(f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`" + # for name, cnt in counts.iteritems()) + # return res + # + # async def biggest_single(self, match: re.Match) -> str: + # days = match.group('days') or 14 + # days = int(days) + # data: pd.Series = self.data.emoji_totals( + # emoji_name=get_emoji_name(match.group('emoji')), + # days=days + # ) + # user: User = await self.fetch_user(user_id=data.index[0]) + # LOGGER.info(f'User: {user.mention}') + # msg = f'{user.mention} with {data.iloc[0]:.0f}x {match.group("emoji")} over the past {days} days' + # msg += '\n' + await self.worst_offsenses(user=user, days=days, top=3, emoji_str=match.group('emoji')) + # return msg + # + # async def worst_offsenses(self, user: User, emoji_str: str, days: int = None, top: int = 3) -> str: + # df: pd.DataFrame = self.data.emoji_messages(get_emoji_name(emoji_str), days=days) + # df: pd.DataFrame = df[df['user id'] == user.id].sort_values('count', ascending=False).iloc[:top] + # + # if df.shape[0] > 0: + # res = f'Top {top} {emoji_str}\n' + # res += f'\n'.join( + # f'{emoji_str}x{row["count"]:.0f}\n{row["link"]}' for idx, row in df.iterrows()) + # else: + # res = f'No {emoji_str} for {user} in the past {days} days' + # + # return res def get_emoji_name(string: str) -> str: diff --git a/kwaylon/msg.py b/kwaylon/msg.py index 304ffd7..ba42ab7 100644 --- a/kwaylon/msg.py +++ b/kwaylon/msg.py @@ -1,23 +1,15 @@ import logging from datetime import datetime, timedelta -from typing import Dict, Iterable import pandas as pd -from nextcord import Client, Message -from nextcord import Reaction +from nextcord import Client, Message, Reaction from nextcord import TextChannel from nextcord.utils import AsyncIterator LOGGER = logging.getLogger(__name__) -async def message_df(client: Client, **kwargs): - return pd.DataFrame( - [message_dict(m) async for m in message_gen(client, **kwargs)] - ).set_index('id').sort_values('created', ascending=False) - - -async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> AsyncIterator: +async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> AsyncIterator[Message]: if 'after' not in kwargs: kwargs['after'] = (datetime.today() - timedelta(days=days)) elif isinstance((after := kwargs.get('after', None)), datetime): @@ -25,6 +17,7 @@ async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> Asy kwargs['limit'] = limit + LOGGER.info(kwargs) for channel in client.get_all_channels(): if channel.category is not None and channel.category.name != 'Archive': if isinstance(channel, TextChannel): @@ -41,34 +34,20 @@ async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> Asy LOGGER.info(f'Done getting messages') -def message_dict(m: Message) -> Dict: - return { - 'object': m, - 'id': m.id, - 'created': m.created_at.astimezone(), - 'display_name': m.author.display_name, - 'user id': m.author.id, - 'message': m.content, - 'channel': m.channel.name, - 'channel link': m.channel.mention, - 'link': m.jump_url, - } +async def reaction_gen(client: Client, **kwargs) -> AsyncIterator[Reaction]: + async for msg in message_gen(client=client, **kwargs): + for reaction in msg.reactions: + yield { + 'msg_id': reaction.message.id, + 'emoji': reaction.emoji.name if reaction.is_custom_emoji() else reaction.emoji, + 'emoji_id': reaction.emoji.id if reaction.is_custom_emoji() else None, + 'channel_id': msg.channel.id, + 'guild_id': msg.channel.guild.id, + 'auth_id': msg.author.id, + 'count': int(reaction.count), + 'datetime': msg.created_at.astimezone(), + } -def full_reaction_df(msgs: Iterable[Message]): - return pd.concat([reaction_df(msg) for msg in msgs]) - - -def reaction_df(msg: Message): - df = pd.DataFrame([reaction_dict(r) for r in msg.reactions]) - return df.set_index(['msg id', 'emoji']) if not df.empty else df - - -def reaction_dict(r: Reaction) -> Dict: - return { - 'object': r, - 'msg id': r.message.id, - 'emoji': r.emoji.name if r.is_custom_emoji() else r.emoji, - 'emoji id': r.emoji.id if r.is_custom_emoji() else None, - 'count': int(r.count), - } +async def reaction_df(client: Client, **kwargs): + return pd.DataFrame([r async for r in reaction_gen(client=client, **kwargs)]) diff --git a/main.py b/main.py index 1250e2e..1ce5ed9 100644 --- a/main.py +++ b/main.py @@ -10,7 +10,6 @@ if __name__ == '__main__': logging.basicConfig(level=logging.INFO) - # https://discordpy.readthedocs.io/en/stable/quickstart.html client = Kwaylon( # limit=100, # days=10 @@ -36,6 +35,5 @@ if __name__ == '__main__': # async def on_raw_reaction_remove(payload: RawReactionActionEvent): # await client.handle_raw_reaction(payload) - load_dotenv() client.run(os.getenv('DISCORD_TOKEN'))