143 lines
4.8 KiB
Python
143 lines
4.8 KiB
Python
import logging
|
|
import os
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Iterable
|
|
|
|
import discord
|
|
import pandas as pd
|
|
import pandas.errors
|
|
from dotenv import load_dotenv
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
def convert_emoji(emoji):
|
|
try:
|
|
emoji.name.encode('ascii')
|
|
except UnicodeEncodeError as e:
|
|
emoji.name = emoji.name.encode('unicode-escape').decode('ascii')
|
|
return emoji
|
|
|
|
|
|
async def message_df(client: discord.Client, **kwargs):
|
|
return pd.DataFrame(
|
|
[message_dict(m) async for m in message_gen(client, **kwargs)]
|
|
).set_index('id').sort_values('created', ascending=False)
|
|
|
|
|
|
async def message_gen(client: discord.Client, limit=20, days: int = 90, **kwargs):
|
|
channels = client.get_all_channels()
|
|
channels = filter(lambda c: isinstance(c, discord.TextChannel), channels)
|
|
channels = filter(lambda c: c.category.name != 'Archive', channels)
|
|
channels = sorted(channels, key=lambda c: (c.category.name, c.name))
|
|
for channel in channels:
|
|
LOGGER.info(f'{channel.category.name} #{channel.name}')
|
|
if 'after' not in kwargs:
|
|
kwargs['after'] = (datetime.today() - timedelta(days=days))
|
|
elif isinstance((after := kwargs.get('after', None)), datetime):
|
|
kwargs['after'] = after.replace(tzinfo=None)
|
|
async for msg in channel.history(limit=limit, **kwargs):
|
|
yield msg
|
|
|
|
|
|
def message_dict(m: discord.Message) -> Dict:
|
|
return {
|
|
'object': m,
|
|
'id': m.id,
|
|
'created': m.created_at.astimezone(),
|
|
'display_name': m.author.display_name,
|
|
'user id': m.author.id,
|
|
'message': m.content,
|
|
'channel': m.channel.name,
|
|
'channel link': f'<#{m.channel.id}>',
|
|
'link': m.jump_url,
|
|
}
|
|
|
|
|
|
async def reaction_df(msgs: Iterable[discord.Message]):
|
|
return pd.concat([await reaction_series(msg) for msg in msgs if len(msg.reactions) > 0]).set_index(
|
|
['msg id', 'emoji'])
|
|
|
|
|
|
async def reaction_series(msg: discord.Message):
|
|
if len(msg.reactions) > 0:
|
|
return pd.DataFrame([
|
|
await reaction_dict(r)
|
|
for r in msg.reactions
|
|
])
|
|
|
|
|
|
async def reaction_dict(r: discord.Reaction) -> Dict:
|
|
is_emoji = isinstance(r.emoji, (discord.Emoji, discord.PartialEmoji))
|
|
# LOGGER.info(repr(r.emoji))
|
|
return {
|
|
'msg id': r.message.id,
|
|
'emoji': r.emoji.name if is_emoji else r.emoji.encode('unicode-escape').decode('ascii'),
|
|
'emoji id': r.emoji.id if is_emoji else None,
|
|
'count': int(r.count),
|
|
}
|
|
|
|
|
|
def emoji_messages(msg_df, react_df, emoji_name: str, days: int = 10) -> pd.DataFrame:
|
|
cached_emojis = react_df.index.get_level_values(1).drop_duplicates().values
|
|
|
|
if emoji_name in cached_emojis:
|
|
reactions = react_df.loc[pd.IndexSlice[:, emoji_name], :]
|
|
reacted_msgs = msg_df.loc[reactions.index.get_level_values(0).to_list()]
|
|
reacted_msgs = reacted_msgs[~reacted_msgs.index.duplicated()].sort_index()
|
|
if reacted_msgs.shape[0] == 0:
|
|
LOGGER.error(f'No messages found with {emoji_name} reactions')
|
|
else:
|
|
LOGGER.info(
|
|
f'Found {reacted_msgs.shape[0]} messages for the leaderboard, ' + \
|
|
f'{reactions["count"].sum():.0f} reactions total'
|
|
)
|
|
|
|
try:
|
|
reacted_msgs['count'] = reacted_msgs.index.to_series().apply(
|
|
lambda idx: reactions.loc[pd.IndexSlice[idx, emoji_name], 'count'])
|
|
except pandas.errors.InvalidIndexError as e:
|
|
LOGGER.error(f'{e}\n{reacted_msgs[reacted_msgs.index.duplicated()]}')
|
|
raise
|
|
else:
|
|
reacted_msgs = reacted_msgs[
|
|
reacted_msgs['created'] >= (datetime.today() - timedelta(days=days)).astimezone()]
|
|
|
|
reacted_msgs = reacted_msgs.sort_values('count', ascending=False)
|
|
|
|
return reacted_msgs
|
|
else:
|
|
LOGGER.error(f'Emoji not found in reactions DataFrame: {emoji_name}')
|
|
|
|
|
|
def emoji_totals(edf: pd.DataFrame) -> pd.DataFrame:
|
|
totals = edf.groupby('display_name').sum()['count'].sort_values(ascending=False).apply(int)
|
|
max_channels = (
|
|
edf
|
|
.groupby(['display_name', 'channel'])
|
|
.sum()['count']
|
|
.sort_values(ascending=False)
|
|
.groupby(level=0)
|
|
.apply(lambda gdf: gdf.idxmax()[1])
|
|
)
|
|
return pd.DataFrame({
|
|
'total': totals,
|
|
'max channel': max_channels,
|
|
# 'worst': cdf.groupby('display_name').max()['link']
|
|
}).sort_values('total', ascending=False)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
client = discord.Client()
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
@client.event
|
|
async def on_ready():
|
|
print(f'{client.user} has connected to Discord!')
|
|
|
|
|
|
load_dotenv()
|
|
client.run(os.getenv('DISCORD_TOKEN'))
|