Ed Helper#
This notebook is meant to assist with Between-Class Participation grading. To use:
Download the Discussion data
Open analytics
Download the Threads JSON
Adjust the filename and dates below
Run all cells in the notebook
Review the student contributions at the bottom
import pandas as pd
FILENAME = "FILENAME.json"
# dates are inclusive
START = pd.Timestamp(year=2024, month=4, day=17, tz="US/Eastern")
END = pd.Timestamp(year=2024, month=4, day=23, tz="US/Eastern")
Load data#
import json
from pathlib import Path
import pandas as pd
path = Path("..", FILENAME)
data = json.load(open(path))
threads = pd.json_normalize(data)
# threads
threads.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97 entries, 0 to 96
Data columns (total 21 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 url 97 non-null object
1 type 97 non-null object
2 number 97 non-null int64
3 title 97 non-null object
4 category 97 non-null object
5 subcategory 97 non-null object
6 subsubcategory 97 non-null object
7 votes 97 non-null int64
8 views 97 non-null int64
9 unique_views 97 non-null int64
10 private 97 non-null bool
11 anonymous 97 non-null bool
12 endorsed 97 non-null bool
13 created_at 97 non-null object
14 text 97 non-null object
15 document 97 non-null object
16 comments 97 non-null object
17 user.name 97 non-null object
18 user.email 97 non-null object
19 user.role 97 non-null object
20 answers 64 non-null object
dtypes: bool(3), int64(4), object(14)
memory usage: 14.0+ KB
Include replies#
The JSON data includes reples (comments and answers) as nested under each post.
comments = pd.json_normalize(threads["comments"].explode().dropna())
# comments
replies = pd.json_normalize(threads["answers"].explode().dropna())
# replies
posts = pd.concat([threads, comments, replies]).reset_index()
# posts
posts["created_at"] = pd.to_datetime(posts["created_at"])
# posts["created_at"]
Filter#
output = posts[(posts["created_at"] >= START) & (posts["created_at"] <= END)]
print(output["created_at"].min())
print(output["created_at"].max())
2024-04-17 20:59:02.151711+10:00
2024-04-23 13:19:26.484477+10:00
Prep output#
# exclude the instructors
output = output[output["user.role"] != "admin"]
# sort by name
output = output.sort_values(["user.name", "created_at"])
# only include a subset of the columns
output = output[
[
"user.name",
"url",
# "created_at",
# "title",
"text",
]
]
# make links clickable
# https://stackoverflow.com/a/20043785/358804
output["url"] = output["url"].apply(lambda url: f'<a href="{url}">Open</a>')
# render newlines
# https://stackoverflow.com/a/56881411/358804
styled = output.style.set_properties(
**{
"text-align": "left",
"white-space": "pre-wrap",
}
)
Output#
from IPython.display import HTML
HTML(styled.to_html(escape=False))