reddit_scripts/mod_transparency_report.py at master · prettyoaktree/reddit_scripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
############################################
# Moderation Transparency Report Generator #
############################################
"""
This is the code I use to generate the mod transparency reports for r/Orangetheory.
Examples for these reports can be found here: https://www.reddit.com/r/orangetheory/wiki/mod-transparency-reports/
The code also updates a wiki page with links to posted reports.

Please note: this has not been thoroughly tested with other subs and it's possible there are lots of bugs here!
"""

import json
from datetime import datetime, tzinfo
from dateutil.tz import tzutc
import pandas as pd
import praw

"""
Retrieve settings and secrets
This code assumes that all the secrets are in a json file located in the same directory as this script.
You can just enter these secrets here, but keep in mind that they are called "secrets" for a reason.
Make sure your bot user can manage / edit wiki pages if you want to archive these reports on your sub's wiki
"""
CONFIG_FILE='local_config.json'
with open(CONFIG_FILE) as config_file:
    config = json.load(config_file)
monitored_subreddit = config['monitored_subreddit']
reddit_username = config['reddit_username']
reddit_password = config['reddit_password']
reddit_user_agent = config['reddit_user_agent']
reddit_client_id = config['reddit_client_id']
reddit_client_secret = config['reddit_client_secret']

# Enter the range of modlog data to retrieve
earliest_dt = datetime(year=2022, month=7, day=1, tzinfo=tzutc())
latest_dt = datetime(year=2022, month=7, day=31, tzinfo=tzutc())

# Generate Reddit post text in markdown format
post_title = f"Moderation Transparency Report for {latest_dt.strftime('%B %Y')}"
post_body_md = f"This report provides a summary of actions taken by the moderators of r/{monitored_subreddit.title()} during {latest_dt.strftime('%B %Y')}.\n"

# Initialize Reddit connection
with praw.Reddit(
    client_secret=reddit_client_secret,
    client_id=reddit_client_id,
    username=reddit_username,
    password=reddit_password,
    user_agent=reddit_user_agent
) as reddit:

    # Get modlog
    report_data = {}
    for item in reddit.subreddit(monitored_subreddit).mod.log(limit=None):
        item_created_dt = datetime.fromtimestamp(item.created_utc, tz=tzutc())
        if earliest_dt <= item_created_dt <= latest_dt:

            """
            The logic here is a little convoluted because I wanted the report to only consider "items" as opposed to "mod actions"
            For example: if a mod removed a post, added a removal reason, then changed their mind and approved the post, I only want
            the report to consider the item as approved.
            """
            target_item_dict = report_data.get(item.target_fullname, {})
            if item.action == 'addremovalreason':
                target_item_dict['removal_reason'] = item.description
                report_data[item.target_fullname] = target_item_dict
            elif item.action in ['approvelink', 'approvecomment', 'removelink', 'removecomment']:
                if target_item_dict.get('type') is None:
                    target_item_dict['type'] = 'comment' if item.target_fullname.split('_')[0] == 't1' else 'post'
                    target_item_dict['mod_action'] = 'approve' if item.action.startswith('approve') else 'remove'
                    target_item_dict['date_time'] = item_created_dt.strftime('%Y/%m/%d')
                    report_data[item.target_fullname] = target_item_dict

# Get results as DataFrame
print('Creating report...')
df_report = pd.DataFrame(report_data).transpose()

# Summarize mod action data
summary_data = pd.pivot_table(
    data=df_report,
    index=['type', 'mod_action'],
    values='date_time',
    aggfunc='count',
    fill_value=0
)
# Summarize removal reason data
removal_reason_data = pd.pivot_table(
    data = df_report,
    index=['type', 'mod_action', 'removal_reason'],
    values='date_time',
    aggfunc='count',
    fill_value=0
)

# Add removal reason summary to the Reddit post
if 'post' in summary_data['date_time']:
    approved_posts = summary_data['date_time']['post'].get('approve', 0) + summary_data['date_time']['post'].get('chaos_mode', 0)
    removed_posts = summary_data['date_time']['post'].get('remove', 0)
    total_posts = approved_posts + removed_posts
    post_body_md += (
        '## Post Removals\n'
        f"A total of **{total_posts}** posts were reviewed by the moderators, of which **{removed_posts}** "
        f"({int(round(removed_posts / total_posts * 100, 0))}%) were removed for the following reasons:  \n"
    )
    for removal_reason, count in removal_reason_data['date_time']['post']['remove'].sort_values(ascending=False).items():
        post_body_md += f"- {removal_reason}: **{count}** ({int(round(count / removed_posts * 100, 0))}%)  \n"

if 'comment' in summary_data['date_time']:
    approved_comments = summary_data['date_time']['comment'].get('approve', 0) + summary_data['date_time']['comment'].get('chaos_mode', 0)
    removed_comments = summary_data['date_time']['comment'].get('remove', 0)
    total_comments = approved_comments + removed_comments
    post_body_md += (
        '## Comment Removals\n'
        f"A total of **{total_comments}** comments were reported to the moderators by community users or by SplatBot, of which **{removed_comments}** "
        f"({int(round(removed_comments / total_comments * 100, 0))}%) were removed for the following reasons:  \n"
    )
    for removal_reason, count in removal_reason_data['date_time']['comment']['remove'].sort_values(ascending=False).items():
        post_body_md += f"- {removal_reason}: **{count}** ({int(round(count / removed_comments * 100, 0))}%)  \n"


# Get information about bans
bans = []
for item in reddit.subreddit(monitored_subreddit).mod.log(action='banuser', limit=None):
    item_dt = datetime.fromtimestamp(item.created_utc, tz=tzutc())
    if earliest_dt <= item_dt <= latest_dt:
        bans.append({
            'timestamp': item.created_utc,
            'reason': item.description.split(':')[0],
            'duration': item.details
        })

if len(bans) > 0:
    # Clean up BotDefense ban reasons
    for ban in bans:
        if '/u/' in ban['reason']:
            ban['reason'] = 'Unauthorized bot'

    # Summarize and add to post
    df_bans = pd.DataFrame(bans)
    bans_summary_data = pd.pivot_table(
        data=df_bans,
        index=['reason'],
        columns=['duration'],
        values='timestamp',
        aggfunc='count',
        fill_value=0
    )
    post_body_md += (
        '## Bans\n'
        f"A total of **{len(bans)}** bans were issued by the moderators. The table below breaks them down by reason and duration:  \n\n"
    )
    post_body_md += bans_summary_data.to_markdown()


# Add closing statements
post_body_md += (
    '\n\n'
    'We hope you find this information useful and we welcome your feedback. '
    f"All reports are archived on our [wiki](https://www.reddit.com/r/{monitored_subreddit}/wiki/mod-transparency-reports).  \n\n"
    '-The Modsquad'
)

# Connect to Reddit
with praw.Reddit(
    client_secret=reddit_client_secret,
    client_id=reddit_client_id,
    username=reddit_username,
    password=reddit_password,
    user_agent=reddit_user_agent
) as reddit:

    # Post to Reddit
    print('Submitting post...')
    print(f"\n{post_body_md}")
    new_post = reddit.subreddit(monitored_subreddit).submit(title=post_title, selftext=post_body_md, flair_id='161cdb20-1a7d-11e8-affb-0e5c7ea2a678')
    new_post.mod.distinguish()
    print('... Done')

    # Update the wiki page
    print('Updating wiki...')
    WIKI_PAGE_NAME = 'mod-transparency-reports'
    wikipage = reddit.subreddit(monitored_subreddit).wiki[WIKI_PAGE_NAME]
    updated_content_md = wikipage.content_md + f"\n- [{latest_dt.strftime('%B %Y')}](https://reddit.com{new_post.permalink})"
    wikipage.edit(updated_content_md)
    print('... All done.')