better reaction storage

This commit is contained in:
jsl12
2022-01-22 01:45:49 -06:00
parent 64e7dc6bb0
commit a956ee7afe
4 changed files with 173 additions and 172 deletions

View File

@@ -3,64 +3,68 @@ import logging
import sqlite3 import sqlite3
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from typing import Union
import discord
import pandas as pd import pandas as pd
from nextcord import Client, Message
from .msg import message_df, full_reaction_df, message_dict, LOGGER, reaction_df from .msg import LOGGER, reaction_df
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
class MsgData: class MsgData:
db_path: Path
msgs: pd.DataFrame msgs: pd.DataFrame
reactions: pd.DataFrame reactions: pd.DataFrame
lock: asyncio.Lock lock: asyncio.Lock
@classmethod def __init__(self, path: Union[str, Path]):
async def create(cls, client: discord.Client, **kwargs):
self = MsgData()
self.lock = asyncio.Lock() self.lock = asyncio.Lock()
self.msgs: pd.DataFrame = await message_df(client, **kwargs) self.db_path: Path = Path(path) if isinstance(path, str) else path
self.msgs = self.msgs.sort_values('created')
self.reactions: pd.DataFrame = full_reaction_df(self.msgs['object'].tolist())
return self
@classmethod @property
def from_sql(cls, db, local_tz='US/Central'): def sql_context(self):
if isinstance(db, (str, Path)): return sqlite3.connect(self.db_path)
con = sqlite3.connect(db)
elif isinstance(db, sqlite3.Connection):
con = db
self = MsgData() async def load_sql(self, local_tz='US/Central'):
self.msgs: pd.DataFrame = pd.read_sql('select * from msgs', con=con, index_col='id') async with self.lock:
self.msgs['created'] = pd.to_datetime(self.msgs['created']).dt.tz_convert(local_tz) with self.sql_context as con:
self.reactions: pd.DataFrame = pd.read_sql('select * from reactions', con).set_index(['msg id', 'emoji']) LOGGER.info(f'Opened {self.db_path.name}')
return self try:
self.reactions = pd.read_sql('select * from reactions', con=con).reset_index()
self.reactions['datetime'] = pd.to_datetime(self.reactions['datetime']).dt.tz_convert(local_tz)
except:
LOGGER.warning(f'failed to read reactions from: {self.db_path.resolve()}')
else:
LOGGER.info(f'read {self.reactions.shape[0]:,} reactions')
con.close()
def to_sql(self, db): async def scan_messages(self, client: Client, **kwargs):
if isinstance(db, (str, Path)): self.reactions = await reaction_df(client, **kwargs)
con = sqlite3.connect(db) async with self.lock:
elif isinstance(db, sqlite3.Connection): with self.sql_context as con:
con = db self.reactions.to_sql(
name='reactions',
con=con,
if_exists='replace',
index=False,
index_label=self.reactions.index.name
)
LOGGER.info(f'wrote {self.reactions.shape[0]:,} into {self.db_path.name}')
def most(self, emoji: str):
matching = self.reactions['emoji'] == emoji
if not matching.any():
LOGGER.info(f'No reactions with {emoji}')
return
else: else:
raise TypeError(f'db argument is not a valid type: {type(db)}') return self.reactions.loc[matching].sort_values('count', ascending=False).reset_index(drop=True)
self.msgs.drop('object', axis=1).to_sql( async def fetch_message(self, client: Client, row: pd.Series):
name='msgs', guild = await client.fetch_guild(row['guild_id'])
con=con, channel = await guild.fetch_channel(row['channel_id'])
if_exists='replace', return await channel.fetch_message(row['msg_id'])
index=True,
index_label=self.msgs.index.name
)
self.reactions.drop('object', axis=1).to_sql(
name='reactions',
con=con,
if_exists='replace',
index=True,
index_label=self.reactions.index.name
)
def __str__(self): def __str__(self):
return str(self.msgs) + '\n\n' + str(self.reactions) return str(self.msgs) + '\n\n' + str(self.reactions)
@@ -74,14 +78,14 @@ class MsgData:
elif isinstance(item, int): elif isinstance(item, int):
return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int) return self.reactions.loc[pd.IndexSlice[item, :],].fillna(0).applymap(int)
async def add_msg(self, message: discord.Message): # async def add_msg(self, message: Message):
async with self.lock: # async with self.lock:
mdict = message_dict(message) # mdict = message_dict(message)
mdict.pop('id') # mdict.pop('id')
self.msgs.loc[message.id] = pd.Series(mdict) # self.msgs.loc[message.id] = pd.Series(mdict)
LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}') # LOGGER.info(f'Added message id {message.id} from {message.author}: {message.content}')
async def update_reaction(self, msg: discord.Message): async def update_reaction(self, msg: Message):
# Drop all the reactions for this message id, if there are any # Drop all the reactions for this message id, if there are any
try: try:
async with self.lock: async with self.lock:
@@ -96,8 +100,8 @@ class MsgData:
self.reactions = self.reactions.append(new) self.reactions = self.reactions.append(new)
LOGGER.info(str(new.droplevel(level=0, axis=0).loc[:, 'count'])) LOGGER.info(str(new.droplevel(level=0, axis=0).loc[:, 'count']))
if msg.id not in self.msgs.index: # if msg.id not in self.msgs.index:
await self.add_msg(msg) # await self.add_msg(msg)
return new return new
@@ -157,7 +161,7 @@ class MsgData:
f' {type(emoji_name)}:{emoji_name}, {type(days)}:{days}') f' {type(emoji_name)}:{emoji_name}, {type(days)}:{days}')
# return pd.DataFrame() # return pd.DataFrame()
async def emoji_user_counts(self, client: discord.Client, emoji_name: str, days: int = None): async def emoji_user_counts(self, client: Client, emoji_name: str, days: int = None):
"""Creates a Series indexed by user display_name with the number of reactions with emoji_name as values""" """Creates a Series indexed by user display_name with the number of reactions with emoji_name as values"""
counts: pd.Series = self.emoji_totals(emoji_name, days) counts: pd.Series = self.emoji_totals(emoji_name, days)
counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index]) counts.index = pd.Index([(await client.fetch_user(user_id=uid)).display_name for uid in counts.index])

View File

@@ -1,18 +1,20 @@
import logging import logging
import re import re
from datetime import timedelta, datetime
from pathlib import Path from pathlib import Path
import nextcord as discord import nextcord as discord
import pandas as pd from nextcord import Client, Message, TextChannel
from . import jokes from . import jokes
from .data import MsgData
LIL_STINKY_ID = 704043422276780072 LIL_STINKY_ID = 704043422276780072
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
class Kwaylon(discord.Client): class Kwaylon(Client):
db_path: Path = Path('../messages.db') db_path: Path = Path('../messages.db')
def __init__(self, limit: int = 5000, days: int = 30, *args, **kwargs): def __init__(self, limit: int = 5000, days: int = 30, *args, **kwargs):
@@ -20,10 +22,7 @@ class Kwaylon(discord.Client):
self.limit, self.days = limit, days self.limit, self.days = limit, days
self.jokes = list(jokes.collect_jokes()) self.jokes = list(jokes.collect_jokes())
self.most_regex = re.compile( self.most_regex = re.compile('most\s+(?P<emoji>\S+)')
'^who is the most\s+(?P<emoji>\S+)\s*?(?:in the past (?P<days>\d+) days)?\??$',
re.IGNORECASE,
)
self.leaderboard_regex = re.compile( self.leaderboard_regex = re.compile(
'^most\s*?(?P<emoji>\S+?)\s*?(leaderboard|((?:.+?(?P<days>\d+) days)))', '^most\s*?(?P<emoji>\S+?)\s*?(leaderboard|((?:.+?(?P<days>\d+) days)))',
re.IGNORECASE re.IGNORECASE
@@ -31,97 +30,118 @@ class Kwaylon(discord.Client):
async def handle_ready(self): async def handle_ready(self):
async def alive(): async def alive():
channel: discord.TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility') channel: TextChannel = discord.utils.get(self.get_all_channels(), name='robotics-facility')
await channel.send('https://tenor.com/view/terminator-im-back-gif-19144173') await channel.send('https://tenor.com/view/terminator-im-back-gif-19144173')
await channel.send(f"{discord.utils.get(self.emojis, name='kaylon')}") await channel.send(f"{discord.utils.get(self.emojis, name='kaylon')}")
# await alive() # await alive()
# self.data: data.MsgData = await data.MsgData.create(client=self, limit=self.limit, days=self.days) self.data = MsgData(path=Path('./messages.db'))
# self.data.to_sql(self.db_path) await self.data.load_sql()
# LOGGER.info(f'{self.data.msgs.shape[0]} messages total') if not hasattr(self.data, 'reactions'):
await self.data.scan_messages(client=self, limit=self.limit, days=self.days)
async def handle_message(self, message): async def handle_message(self, message: Message):
if message.author != self.user: if message.author != self.user:
if hasattr(self, 'data'): for mention in message.mentions:
await self.data.add_msg(message) if mention.id == self.user.id and 'read' in message.content:
if (m := re.search('(\d+) days', message.content)):
days = int(m.group(1))
else:
days = self.days
if (m := self.leaderboard_regex.match(message.content)) is not None: await self.data.scan_messages(client=self, limit=self.limit, days=days)
try: return
await message.reply(await self.leaderboard(match=m)) # if hasattr(self, 'data'):
except KeyError as e: # await self.data.add_msg(message)
LOGGER.exception(e) #
await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!") # if (m := self.leaderboard_regex.match(message.content)) is not None:
return # try:
# await message.reply(await self.leaderboard(match=m))
# except KeyError as e:
# LOGGER.exception(e)
# await message.reply(f"I couldn't find any {m.group('emoji')} reactions. Leave me alone!")
# return
elif (m := self.most_regex.match(message.content)) is not None: if (m := self.most_regex.match(message.clean_content)) is not None:
try: await self.data.load_sql()
await message.reply(await self.biggest_single(match=m)) emoji = get_emoji_name(m.group('emoji'))
except Exception as e: LOGGER.info(emoji)
LOGGER.exception(e)
await message.reply('NObody') if (most := self.data.most(emoji=emoji)) is not None:
else: LOGGER.info(f'\n{str(most)}')
LOGGER.warning(f'No self.data attribute') if (day_match := re.search('(?P<days>\d+) days', message.content)):
start = (datetime.today() - timedelta(days=int(day_match.group('days')))).astimezone()
valid_dates = most['datetime'] > start
most = most.loc[valid_dates]
if most.shape[0] > 0:
most = most.iloc[0]
msg = await self.data.fetch_message(self, most)
await message.reply(f'{msg.jump_url}')
LOGGER.info(f'{msg.clean_content}')
LOGGER.info(f' - {msg.author}')
LOGGER.info(f'{most["count"]}x {emoji}')
for joke in self.jokes: for joke in self.jokes:
if (m := joke.scan(message)) is not None: if (m := joke.scan(message)) is not None:
LOGGER.info(f'{joke.__class__.__name__} detected: {message.content}, {m.group()}') LOGGER.info(f'{joke.__class__.__name__} detected: {message.content}, {m.group()}')
await joke.respond(message, self, m) await joke.respond(message, self, m)
async def handle_raw_reaction(self, payload: discord.RawReactionActionEvent): # async def handle_raw_reaction(self, payload: RawReactionActionEvent):
LOGGER.info(payload) # LOGGER.info(payload)
guild = await self.fetch_guild(payload.guild_id) # guild = await self.fetch_guild(payload.guild_id)
channel = await guild.fetch_channel(payload.channel_id) # channel = await guild.fetch_channel(payload.channel_id)
message = await channel.fetch_message(payload.message_id) # message = await channel.fetch_message(payload.message_id)
#
# if payload.event_type == 'REACTION_REMOVE':
# LOGGER.info(f'{payload.emoji} removed from\n{message.author}: {message.content}')
# elif payload.event_type == 'REACTION_ADD':
# LOGGER.info(
# f'{payload.member.display_name} added {payload.emoji} to\n' + \
# f'{message.author.display_name}: {message.content}')
#
# if hasattr(self, 'data'):
# await self.data.update_reaction(msg=message)
if payload.event_type == 'REACTION_REMOVE': # async def leaderboard(self, match: re.Match) -> str:
LOGGER.info(f'{payload.emoji} removed from\n{message.author}: {message.content}') # emoji_name = get_emoji_name(match.group('emoji'))
elif payload.event_type == 'REACTION_ADD': # days = match.group('days') or 14
LOGGER.info( # days = int(days)
f'{payload.member.display_name} added {payload.emoji} to\n' + \ # counts = await self.data.emoji_user_counts(client=self,
f'{message.author.display_name}: {message.content}') # emoji_name=emoji_name,
# days=days)
if hasattr(self, 'data'): # width = max([len(str(s)) for s in counts.index.values])
await self.data.update_reaction(msg=message) # res = f'{match.group("emoji")} totals, past {days} days\n'
# res += '\n'.join(f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`"
async def leaderboard(self, match: re.Match) -> str: # for name, cnt in counts.iteritems())
emoji_name = get_emoji_name(match.group('emoji')) # return res
days = match.group('days') or 14 #
days = int(days) # async def biggest_single(self, match: re.Match) -> str:
counts = await self.data.emoji_user_counts(client=self, # days = match.group('days') or 14
emoji_name=emoji_name, # days = int(days)
days=days) # data: pd.Series = self.data.emoji_totals(
width = max([len(str(s)) for s in counts.index.values]) # emoji_name=get_emoji_name(match.group('emoji')),
res = f'{match.group("emoji")} totals, past {days} days\n' # days=days
res += '\n'.join(f"`{str(name).ljust(width + 1)}with {cnt:<2.0f} total`" # )
for name, cnt in counts.iteritems()) # user: User = await self.fetch_user(user_id=data.index[0])
return res # LOGGER.info(f'User: {user.mention}')
# msg = f'{user.mention} with {data.iloc[0]:.0f}x {match.group("emoji")} over the past {days} days'
async def biggest_single(self, match: re.Match) -> str: # msg += '\n' + await self.worst_offsenses(user=user, days=days, top=3, emoji_str=match.group('emoji'))
days = match.group('days') or 14 # return msg
days = int(days) #
data: pd.Series = self.data.emoji_totals( # async def worst_offsenses(self, user: User, emoji_str: str, days: int = None, top: int = 3) -> str:
emoji_name=get_emoji_name(match.group('emoji')), # df: pd.DataFrame = self.data.emoji_messages(get_emoji_name(emoji_str), days=days)
days=days # df: pd.DataFrame = df[df['user id'] == user.id].sort_values('count', ascending=False).iloc[:top]
) #
user: discord.User = await self.fetch_user(user_id=data.index[0]) # if df.shape[0] > 0:
LOGGER.info(f'User: {user.mention}') # res = f'Top {top} {emoji_str}\n'
msg = f'{user.mention} with {data.iloc[0]:.0f}x {match.group("emoji")} over the past {days} days' # res += f'\n'.join(
msg += '\n' + await self.worst_offsenses(user=user, days=days, top=3, emoji_str=match.group('emoji')) # f'{emoji_str}x{row["count"]:.0f}\n{row["link"]}' for idx, row in df.iterrows())
return msg # else:
# res = f'No {emoji_str} for {user} in the past {days} days'
async def worst_offsenses(self, user: discord.User, emoji_str: str, days: int = None, top: int = 3) -> str: #
df: pd.DataFrame = self.data.emoji_messages(get_emoji_name(emoji_str), days=days) # return res
df: pd.DataFrame = df[df['user id'] == user.id].sort_values('count', ascending=False).iloc[:top]
if df.shape[0] > 0:
res = f'Top {top} {emoji_str}\n'
res += f'\n'.join(
f'{emoji_str}x{row["count"]:.0f}\n{row["link"]}' for idx, row in df.iterrows())
else:
res = f'No {emoji_str} for {user} in the past {days} days'
return res
def get_emoji_name(string: str) -> str: def get_emoji_name(string: str) -> str:

View File

@@ -1,23 +1,15 @@
import logging import logging
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Dict, Iterable
import pandas as pd import pandas as pd
from nextcord import Client, Message from nextcord import Client, Message, Reaction
from nextcord import Reaction
from nextcord import TextChannel from nextcord import TextChannel
from nextcord.utils import AsyncIterator from nextcord.utils import AsyncIterator
LOGGER = logging.getLogger(__name__) LOGGER = logging.getLogger(__name__)
async def message_df(client: Client, **kwargs): async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> AsyncIterator[Message]:
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> AsyncIterator:
if 'after' not in kwargs: if 'after' not in kwargs:
kwargs['after'] = (datetime.today() - timedelta(days=days)) kwargs['after'] = (datetime.today() - timedelta(days=days))
elif isinstance((after := kwargs.get('after', None)), datetime): elif isinstance((after := kwargs.get('after', None)), datetime):
@@ -25,6 +17,7 @@ async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> Asy
kwargs['limit'] = limit kwargs['limit'] = limit
LOGGER.info(kwargs)
for channel in client.get_all_channels(): for channel in client.get_all_channels():
if channel.category is not None and channel.category.name != 'Archive': if channel.category is not None and channel.category.name != 'Archive':
if isinstance(channel, TextChannel): if isinstance(channel, TextChannel):
@@ -41,34 +34,20 @@ async def message_gen(client: Client, limit=20, days: int = 90, **kwargs) -> Asy
LOGGER.info(f'Done getting messages') LOGGER.info(f'Done getting messages')
def message_dict(m: Message) -> Dict: async def reaction_gen(client: Client, **kwargs) -> AsyncIterator[Reaction]:
return { async for msg in message_gen(client=client, **kwargs):
'object': m, for reaction in msg.reactions:
'id': m.id, yield {
'created': m.created_at.astimezone(), 'msg_id': reaction.message.id,
'display_name': m.author.display_name, 'emoji': reaction.emoji.name if reaction.is_custom_emoji() else reaction.emoji,
'user id': m.author.id, 'emoji_id': reaction.emoji.id if reaction.is_custom_emoji() else None,
'message': m.content, 'channel_id': msg.channel.id,
'channel': m.channel.name, 'guild_id': msg.channel.guild.id,
'channel link': m.channel.mention, 'auth_id': msg.author.id,
'link': m.jump_url, 'count': int(reaction.count),
} 'datetime': msg.created_at.astimezone(),
}
def full_reaction_df(msgs: Iterable[Message]): async def reaction_df(client: Client, **kwargs):
return pd.concat([reaction_df(msg) for msg in msgs]) return pd.DataFrame([r async for r in reaction_gen(client=client, **kwargs)])
def reaction_df(msg: Message):
df = pd.DataFrame([reaction_dict(r) for r in msg.reactions])
return df.set_index(['msg id', 'emoji']) if not df.empty else df
def reaction_dict(r: Reaction) -> Dict:
return {
'object': r,
'msg id': r.message.id,
'emoji': r.emoji.name if r.is_custom_emoji() else r.emoji,
'emoji id': r.emoji.id if r.is_custom_emoji() else None,
'count': int(r.count),
}

View File

@@ -10,7 +10,6 @@ if __name__ == '__main__':
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# https://discordpy.readthedocs.io/en/stable/quickstart.html
client = Kwaylon( client = Kwaylon(
# limit=100, # limit=100,
# days=10 # days=10
@@ -36,6 +35,5 @@ if __name__ == '__main__':
# async def on_raw_reaction_remove(payload: RawReactionActionEvent): # async def on_raw_reaction_remove(payload: RawReactionActionEvent):
# await client.handle_raw_reaction(payload) # await client.handle_raw_reaction(payload)
load_dotenv() load_dotenv()
client.run(os.getenv('DISCORD_TOKEN')) client.run(os.getenv('DISCORD_TOKEN'))