Files
kwaylon/msg.py
2021-08-12 10:51:19 -05:00

139 lines
4.6 KiB
Python

import logging
import os
from datetime import datetime, timedelta
from typing import Dict, Iterable
import discord
import pandas as pd
import pandas.errors
from dotenv import load_dotenv
LOGGER = logging.getLogger(__name__)
def convert_emoji(emoji):
try:
emoji.name.encode('ascii')
except UnicodeEncodeError as e:
emoji.name = emoji.name.encode('unicode-escape').decode('ascii')
return emoji
async def message_df(client: discord.Client, **kwargs):
return pd.DataFrame(
[message_dict(m) async for m in message_gen(client, **kwargs)]
).set_index('id').sort_values('created', ascending=False)
async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs):
channels = client.get_all_channels()
channels = filter(lambda c: isinstance(c, discord.TextChannel), channels)
channels = filter(lambda c: c.category.name != 'Archive', channels)
channels = sorted(channels, key=lambda c: (c.category.name, c.name))
for channel in channels:
LOGGER.info(f'{channel.category.name} #{channel.name}')
if 'after' not in kwargs:
kwargs['after'] = (datetime.today() - timedelta(days=days))
elif isinstance((after := kwargs.get('after', None)), datetime):
kwargs['after'] = after.replace(tzinfo=None)
async for msg in channel.history(limit=limit, **kwargs):
yield msg
def message_dict(m: discord.Message) -> Dict:
return {
'object': m,
'id': m.id,
'created': m.created_at.astimezone(),
'display_name': m.author.display_name,
'user id': m.author.id,
'message': m.content,
'channel': m.channel.name,
'channel link': f'<#{m.channel.id}>',
'link': m.jump_url,
}
async def reaction_df(msgs: Iterable[discord.Message]):
return pd.concat([await reaction_series(msg) for msg in msgs if len(msg.reactions) > 0]).set_index(
['msg id', 'emoji'])
async def reaction_series(msg: discord.Message):
if len(msg.reactions) > 0:
return pd.DataFrame([
await reaction_dict(r)
for r in msg.reactions
])
async def reaction_dict(r: discord.Reaction) -> Dict:
is_emoji = isinstance(r.emoji, (discord.Emoji, discord.PartialEmoji))
# LOGGER.info(repr(r.emoji))
return {
'msg id': r.message.id,
'emoji': r.emoji.name if is_emoji else r.emoji.encode('unicode-escape').decode('ascii'),
'emoji id': r.emoji.id if is_emoji else None,
'count': int(r.count),
}
def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame:
cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values
if emoji_name in cached_emojis:
reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :]
reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()]
if reacted_msgs.shape[0] == 0:
LOGGER.error(f'No messages found with {emoji_name} reactions')
else:
LOGGER.info(
f'Found {reacted_msgs.shape[0]} messages for the leaderboard, {reactions["count"].sum():.0f} reactions total')
try:
reacted_msgs['count'] = reacted_msgs.index.to_series().apply(
lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count'])
except pandas.errors.InvalidIndexError as e:
LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}')
reacted_msgs = reacted_msgs[
reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
reacted_msgs = reacted_msgs.sort_values('count', ascending=False)
return reacted_msgs
else:
LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}')
def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame:
totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int)
max_channels = (
edf
.groupby(['display_name', 'channel'])
.sum()['count']
.sort_values(ascending=False)
.groupby(level=0)
.apply(lambda gdf: gdf.idxmax()[1])
)
return pd.DataFrame({
'total': totals,
'max channel': max_channels,
# 'worst': cdf.groupby('display_name').max()['link']
}).sort_values('total', ascending=False)
if __name__ == '__main__':
client = discord.Client()
logging.basicConfig(level=logging.INFO)
@client.event
async def on_ready():
print(f'{client.user} has connected to Discord!')
load_dotenv()
client.run(os.getenv('DISCORD_TOKEN'))