Lecture 4 demo solution#

import pandas as pd

entries = pd.read_csv(
    "https://data.ny.gov/resource/t6yz-b64h.csv?$limit=300000&$order=toll_10_minute_block",
    parse_dates=["toll_date", "toll_hour", "toll_10_minute_block", "toll_week"],
    date_format="ISO8601",
)
entries
toll_date toll_hour toll_10_minute_block minute_of_hour hour_of_day day_of_week_int day_of_week toll_week time_period vehicle_class detection_group detection_region crz_entries excluded_roadway_entries
0 2025-01-05 2025-01-05 00:00:00 2025-01-05 00:00:00 0 0 1 Sunday 2025-01-05 Overnight 1 - Cars, Pickups and Vans Brooklyn Bridge Brooklyn 98 95
1 2025-01-05 2025-01-05 00:00:00 2025-01-05 00:00:00 0 0 1 Sunday 2025-01-05 Overnight 1 - Cars, Pickups and Vans Hugh L. Carey Tunnel Brooklyn 17 22
2 2025-01-05 2025-01-05 00:00:00 2025-01-05 00:00:00 0 0 1 Sunday 2025-01-05 Overnight 1 - Cars, Pickups and Vans Manhattan Bridge Brooklyn 96 0
3 2025-01-05 2025-01-05 00:00:00 2025-01-05 00:00:00 0 0 1 Sunday 2025-01-05 Overnight 1 - Cars, Pickups and Vans Williamsburg Bridge Brooklyn 138 0
4 2025-01-05 2025-01-05 00:00:00 2025-01-05 00:00:00 0 0 1 Sunday 2025-01-05 Overnight 1 - Cars, Pickups and Vans East 60th St East 60th St 208 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
299995 2025-02-02 2025-02-02 22:00:00 2025-02-02 22:20:00 20 22 1 Sunday 2025-02-02 Overnight 4 - Buses Lincoln Tunnel New Jersey 1 0
299996 2025-02-02 2025-02-02 22:00:00 2025-02-02 22:20:00 20 22 1 Sunday 2025-02-02 Overnight 4 - Buses Queens Midtown Tunnel Queens 3 0
299997 2025-02-02 2025-02-02 22:00:00 2025-02-02 22:20:00 20 22 1 Sunday 2025-02-02 Overnight 4 - Buses Queensboro Bridge Queens 3 0
299998 2025-02-02 2025-02-02 22:00:00 2025-02-02 22:20:00 20 22 1 Sunday 2025-02-02 Overnight 4 - Buses West 60th St West 60th St 5 0
299999 2025-02-02 2025-02-02 22:00:00 2025-02-02 22:20:00 20 22 1 Sunday 2025-02-02 Overnight 4 - Buses West Side Highway at 60th St West Side Highway 0 0

300000 rows × 14 columns

Only keep columns that are needed:

entries_minimal = entries[["toll_10_minute_block", "vehicle_class", "crz_entries"]]
entries_minimal
toll_10_minute_block vehicle_class crz_entries
0 2025-01-05 00:00:00 1 - Cars, Pickups and Vans 98
1 2025-01-05 00:00:00 1 - Cars, Pickups and Vans 17
2 2025-01-05 00:00:00 1 - Cars, Pickups and Vans 96
3 2025-01-05 00:00:00 1 - Cars, Pickups and Vans 138
4 2025-01-05 00:00:00 1 - Cars, Pickups and Vans 208
... ... ... ...
299995 2025-02-02 22:20:00 4 - Buses 1
299996 2025-02-02 22:20:00 4 - Buses 3
299997 2025-02-02 22:20:00 4 - Buses 3
299998 2025-02-02 22:20:00 4 - Buses 5
299999 2025-02-02 22:20:00 4 - Buses 0

300000 rows × 3 columns

entries_by_10_min = (
    entries_minimal.groupby("toll_10_minute_block").sum(numeric_only=True).reset_index()
)
entries_by_10_min
toll_10_minute_block crz_entries
0 2025-01-05 00:00:00 2713
1 2025-01-05 00:10:00 2646
2 2025-01-05 00:20:00 2564
3 2025-01-05 00:30:00 2405
4 2025-01-05 00:40:00 2405
... ... ...
4162 2025-02-02 21:40:00 2823
4163 2025-02-02 21:50:00 2768
4164 2025-02-02 22:00:00 2395
4165 2025-02-02 22:10:00 2542
4166 2025-02-02 22:20:00 1520

4167 rows × 2 columns

import plotly.express as px

fig = px.line(
    entries_by_10_min,
    x="toll_10_minute_block",
    y="crz_entries",
    title="Entries to the Congestion Relief Zone",
)
fig.show()