Sandbox

Sandbox#

How to Use This Notebook#

This notebook allows you to:

  1. Select an air pollutant (e.g., o3, no2, pm10, etc.), with the variable pollutant.

  2. Download data for a specific monitoring station in the UK, with the variable station_id.

  3. Choose a variable (column) from that station’s data to visualize, with the variable variable_to_plot.

  4. Plot the selected variable over time or as a geographical scatter.

The code will also print out a list of all the options for each stage for you to choose from.

import pandas as pd
import matplotlib.pyplot as plt
from environmental_insights import data as ei_data

# ─── USER CONFIGURATION ───
pollutant = "o3"  # e.g. "no2", "pm10", etc.

# Fetch the list of station names
stations = ei_data.get_uk_monitoring_stations(pollutant)

# Option 1: print a header, then iterate
print(f"The monitoring stations for the air pollutant {pollutant} are:")
for s in stations:
    print(f"- {s}")
import pandas as pd
import matplotlib.pyplot as plt
from environmental_insights import data as ei_data

# ─── USER CONFIGURATION ───
station_id = "Stoke-on-Trent_Centre"             # station identifier (use underscores)

# ─── STEP 1: Download the station’s data ───
station_gdf = ei_data.get_uk_monitoring_station(
    pollutant=pollutant,
    station=station_id
)

if station_gdf is None or station_gdf.empty:
    raise ValueError(f"No data returned for {station_id} ({pollutant})")

# If CRS is missing, assume EPSG:3995 and reproject to WGS84 (EPSG:4326)
if station_gdf.crs is None:
    station_gdf.set_crs(epsg=3995, inplace=True)
    


# ─── STEP 2: Plot the station location on a simple map with UK outline ───
uk_outline_gdf = ei_data.get_uk_grids_outline()
if uk_outline_gdf.crs is None:
    uk_outline_gdf.set_crs(epsg=3995, inplace=True)

fig, ax = plt.subplots(figsize=(5, 5))
uk_outline_gdf.plot(
    ax=ax,
    facecolor="none",
    edgecolor="black",
    linewidth=0.8,
    alpha=0.7
)
station_gdf.plot(
    ax=ax,
    markersize=80,
    color="red",
    edgecolor="black",
    alpha=0.8,
    label=f"{station_id.replace('_',' ').title()}"
)

ax.set_title(
    f"{pollutant.upper()} Monitoring: {station_id.replace('_', ' ').title()}",
    fontsize=12
)

ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.set_aspect("equal", adjustable="box")
ax.legend(loc="upper right", fontsize=9)
plt.tight_layout()
plt.show()
print("The columns in this DataFrame are:")
for col in station_gdf.columns:
    print(f"- {col}")
# ─── USER CONFIGURATION ───
variable_to_plot = "U_Component_of_Wind_100m" # e.g. any variable in the above list, e.g. "NAEI_SNAP_6_NMVOC"

fig, ax = plt.subplots(figsize=(10, 6))

ax.scatter(
    station_gdf["Timestamp"], 
    station_gdf[variable_to_plot],
    s=1,                     # size of each marker in points² (adjust as needed)
    c="black",                  # fill color
    linewidths=0.2,           # width of each marker’s edge (optional)
    alpha=0.8                 # slight transparency if points overlap heavily
)

ax.set_title(f"{variable_to_plot} at {station_id.replace('_', ' ').title()} ({pollutant.upper()})")
ax.set_xlabel("Timestamp")
ax.set_ylabel(variable_to_plot)
plt.tight_layout()
plt.show()