Ed Helper

Contents

Ed Helper#

This notebook is meant to assist with Between-Class Participation grading. To use:

Download the Discussion data
1. Go to Ed
2. Open analytics
3. Download the Threads JSON
Make a copy of this notebook
Upload the data
Adjust the filename and dates below
Run all cells in the notebook
Review the student contributions at the bottom

import pandas as pd

FILENAME = "FILENAME.json"
# dates are inclusive
START = pd.Timestamp(year=2024, month=4, day=17, tz="US/Eastern")
END = pd.Timestamp(year=2024, month=4, day=23, tz="US/Eastern")

Load data#

import json
from pathlib import Path
import pandas as pd

path = Path("..", FILENAME)
data = json.load(open(path))
threads = pd.json_normalize(data)
# threads

threads.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97 entries, 0 to 96
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 url             97 non-null     object
 type            97 non-null     object
 number          97 non-null     int64 
 title           97 non-null     object
 category        97 non-null     object
 subcategory     97 non-null     object
 subsubcategory  97 non-null     object
 votes           97 non-null     int64 
 views           97 non-null     int64 
 unique_views    97 non-null     int64 
private         97 non-null     bool  
anonymous       97 non-null     bool  
endorsed        97 non-null     bool  
created_at      97 non-null     object
text            97 non-null     object
document        97 non-null     object
comments        97 non-null     object
user.name       97 non-null     object
user.email      97 non-null     object
user.role       97 non-null     object
answers         64 non-null     object
dtypes: bool(3), int64(4), object(14)
memory usage: 14.0+ KB

Include replies#

The JSON data includes reples (comments and answers) as nested under each post.

comments = pd.json_normalize(threads["comments"].explode().dropna())
# comments

replies = pd.json_normalize(threads["answers"].explode().dropna())
# replies

posts = pd.concat([threads, comments, replies]).reset_index()
# posts

posts["created_at"] = pd.to_datetime(posts["created_at"])
# posts["created_at"]

Filter#

output = posts[(posts["created_at"] >= START) & (posts["created_at"] <= END)]
print(output["created_at"].min())
print(output["created_at"].max())

2024-04-17 20:59:02.151711+10:00
2024-04-23 13:19:26.484477+10:00

Prep output#

# exclude the instructors
output = output[output["user.role"] != "admin"]

# sort by name
output = output.sort_values(["user.name", "created_at"])

# only include a subset of the columns
output = output[
    [
        "user.name",
        "url",
        # "created_at",
        # "title",
        "text",
    ]
]

# make links clickable
# https://stackoverflow.com/a/20043785/358804
output["url"] = output["url"].apply(lambda url: f'<a href="{url}">Open</a>')

# render newlines
# https://stackoverflow.com/a/56881411/358804
styled = output.style.set_properties(
    **{
        "text-align": "left",
        "white-space": "pre-wrap",
    }
)

Output#

from IPython.display import HTML

HTML(styled.to_html(escape=False))