Lecture 4 demo solution#
Ensure the visualizations render properly across VSCode, Jupyter Book, etc. You can ignore this.
import plotly.io as pio
pio.renderers.default = "colab+notebook_connected+plotly_mimetype"
import pandas as pd
entries = pd.read_csv(
"https://data.ny.gov/resource/t6yz-b64h.csv?$limit=300000&$order=toll_10_minute_block",
parse_dates=["toll_date", "toll_hour", "toll_10_minute_block", "toll_week"],
date_format="ISO8601",
)
entries
| toll_date | toll_hour | toll_10_minute_block | minute_of_hour | hour_of_day | day_of_week_int | day_of_week | toll_week | time_period | vehicle_class | detection_group | detection_region | crz_entries | excluded_roadway_entries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2025-01-05 | 2025-01-05 00:00:00 | 2025-01-05 00:00:00 | 0 | 0 | 1 | Sunday | 2025-01-05 | Overnight | 1 - Cars, Pickups and Vans | Brooklyn Bridge | Brooklyn | 98 | 95 |
| 1 | 2025-01-05 | 2025-01-05 00:00:00 | 2025-01-05 00:00:00 | 0 | 0 | 1 | Sunday | 2025-01-05 | Overnight | 1 - Cars, Pickups and Vans | Hugh L. Carey Tunnel | Brooklyn | 17 | 22 |
| 2 | 2025-01-05 | 2025-01-05 00:00:00 | 2025-01-05 00:00:00 | 0 | 0 | 1 | Sunday | 2025-01-05 | Overnight | 1 - Cars, Pickups and Vans | Manhattan Bridge | Brooklyn | 96 | 0 |
| 3 | 2025-01-05 | 2025-01-05 00:00:00 | 2025-01-05 00:00:00 | 0 | 0 | 1 | Sunday | 2025-01-05 | Overnight | 1 - Cars, Pickups and Vans | Williamsburg Bridge | Brooklyn | 138 | 0 |
| 4 | 2025-01-05 | 2025-01-05 00:00:00 | 2025-01-05 00:00:00 | 0 | 0 | 1 | Sunday | 2025-01-05 | Overnight | 1 - Cars, Pickups and Vans | East 60th St | East 60th St | 208 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 299995 | 2025-02-02 | 2025-02-02 22:00:00 | 2025-02-02 22:20:00 | 20 | 22 | 1 | Sunday | 2025-02-02 | Overnight | 4 - Buses | Lincoln Tunnel | New Jersey | 1 | 0 |
| 299996 | 2025-02-02 | 2025-02-02 22:00:00 | 2025-02-02 22:20:00 | 20 | 22 | 1 | Sunday | 2025-02-02 | Overnight | 4 - Buses | Queens Midtown Tunnel | Queens | 3 | 0 |
| 299997 | 2025-02-02 | 2025-02-02 22:00:00 | 2025-02-02 22:20:00 | 20 | 22 | 1 | Sunday | 2025-02-02 | Overnight | 4 - Buses | Queensboro Bridge | Queens | 3 | 0 |
| 299998 | 2025-02-02 | 2025-02-02 22:00:00 | 2025-02-02 22:20:00 | 20 | 22 | 1 | Sunday | 2025-02-02 | Overnight | 4 - Buses | West 60th St | West 60th St | 5 | 0 |
| 299999 | 2025-02-02 | 2025-02-02 22:00:00 | 2025-02-02 22:20:00 | 20 | 22 | 1 | Sunday | 2025-02-02 | Overnight | 4 - Buses | West Side Highway at 60th St | West Side Highway | 0 | 0 |
300000 rows × 14 columns
Only keep columns that are needed:
entries_minimal = entries[["toll_10_minute_block", "vehicle_class", "crz_entries"]]
entries_minimal
| toll_10_minute_block | vehicle_class | crz_entries | |
|---|---|---|---|
| 0 | 2025-01-05 00:00:00 | 1 - Cars, Pickups and Vans | 98 |
| 1 | 2025-01-05 00:00:00 | 1 - Cars, Pickups and Vans | 17 |
| 2 | 2025-01-05 00:00:00 | 1 - Cars, Pickups and Vans | 96 |
| 3 | 2025-01-05 00:00:00 | 1 - Cars, Pickups and Vans | 138 |
| 4 | 2025-01-05 00:00:00 | 1 - Cars, Pickups and Vans | 208 |
| ... | ... | ... | ... |
| 299995 | 2025-02-02 22:20:00 | 4 - Buses | 1 |
| 299996 | 2025-02-02 22:20:00 | 4 - Buses | 3 |
| 299997 | 2025-02-02 22:20:00 | 4 - Buses | 3 |
| 299998 | 2025-02-02 22:20:00 | 4 - Buses | 5 |
| 299999 | 2025-02-02 22:20:00 | 4 - Buses | 0 |
300000 rows × 3 columns
entries_by_10_min = (
entries_minimal.groupby("toll_10_minute_block").sum(numeric_only=True).reset_index()
)
entries_by_10_min
| toll_10_minute_block | crz_entries | |
|---|---|---|
| 0 | 2025-01-05 00:00:00 | 2709 |
| 1 | 2025-01-05 00:10:00 | 2645 |
| 2 | 2025-01-05 00:20:00 | 2564 |
| 3 | 2025-01-05 00:30:00 | 2404 |
| 4 | 2025-01-05 00:40:00 | 2405 |
| ... | ... | ... |
| 4162 | 2025-02-02 21:40:00 | 2818 |
| 4163 | 2025-02-02 21:50:00 | 2768 |
| 4164 | 2025-02-02 22:00:00 | 2395 |
| 4165 | 2025-02-02 22:10:00 | 2542 |
| 4166 | 2025-02-02 22:20:00 | 1520 |
4167 rows × 2 columns
import plotly.express as px
fig = px.line(
entries_by_10_min,
x="toll_10_minute_block",
y="crz_entries",
title="Entries to the Congestion Relief Zone",
)
fig.show()
Resampling#
weekly_entries = entries_minimal.resample("W-SUN", on="toll_10_minute_block").sum()
weekly_entries
| vehicle_class | crz_entries | |
|---|---|---|
| toll_10_minute_block | ||
| 2025-01-05 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 417890 |
| 2025-01-12 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3248903 |
| 2025-01-19 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3278921 |
| 2025-01-26 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3311347 |
| 2025-02-02 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3375468 |
weekly_entries.index.name = "date"
weekly_entries
| vehicle_class | crz_entries | |
|---|---|---|
| date | ||
| 2025-01-05 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 417890 |
| 2025-01-12 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3248903 |
| 2025-01-19 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3278921 |
| 2025-01-26 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3311347 |
| 2025-02-02 | 1 - Cars, Pickups and Vans1 - Cars, Pickups an... | 3375468 |
fig = px.line(
weekly_entries,
# x="date", # not needed, since it's the index
y="crz_entries",
title="Weekly vehicle entries into the Congestion Relief Zone",
)
fig.show()
Rolling average#
entries_by_10_min_rolling = entries_by_10_min.rolling(
"1h", on="toll_10_minute_block"
).mean()
fig = px.line(
entries_by_10_min_rolling,
x="toll_10_minute_block",
y="crz_entries",
title="One-hour rolling average of Congestion Relief Zone entries",
)
fig.show()
Let’s try a bigger window:
entries_by_10_min_rolling = entries_by_10_min.rolling(
"3h", on="toll_10_minute_block"
).mean()
fig = px.line(
entries_by_10_min_rolling,
x="toll_10_minute_block",
y="crz_entries",
title="Three-hour rolling average of Congestion Relief Zone entries",
)
fig.show()
Resampling with other grouping#
entries_by_loc = entries_minimal.groupby(
[
pd.Grouper(key="toll_10_minute_block", freq="D"),
"vehicle_class",
]
).sum()
entries_by_loc
| crz_entries | ||
|---|---|---|
| toll_10_minute_block | vehicle_class | |
| 2025-01-05 | 1 - Cars, Pickups and Vans | 253836 |
| 2 - Single-Unit Trucks | 5254 | |
| 3 - Multi-Unit Trucks | 615 | |
| 4 - Buses | 3480 | |
| 5 - Motorcycles | 1486 | |
| ... | ... | ... |
| 2025-02-02 | 2 - Single-Unit Trucks | 5228 |
| 3 - Multi-Unit Trucks | 428 | |
| 4 - Buses | 3847 | |
| 5 - Motorcycles | 1263 | |
| TLC Taxi/FHV | 162552 |
174 rows × 1 columns
entries_by_loc = entries_by_loc.reset_index()
entries_by_loc = entries_by_loc.rename(columns={"toll_10_minute_block": "date"})
fig = px.line(
entries_by_loc,
x="date",
y="crz_entries",
color="vehicle_class",
title="Three-hour rolling average of Congestion Relief Zone entries",
)
fig.show()
Filtering timestamps#
start = entries["toll_10_minute_block"].min()
start
Timestamp('2025-01-05 00:00:00')
end = start + pd.Timedelta(1, "w")
end
Timestamp('2025-01-12 00:00:00')
entries_first_week = entries[entries["toll_10_minute_block"] < end]
entries_first_week["crz_entries"].sum()
np.int64(3246763)