Particulate Matter (PM) data network workflows
Kyle Messier, with assistance from GitHub Copilot
2026-05-20
Source:vignettes/pm_data_workflow.Rmd
pm_data_workflow.RmdThis article demonstrates compact PM monitoring workflows for AQS and
IMPROVE. Because these measurements are typically used as outcome data,
the example uses download_aqs() /
process_aqs() and download_improve() /
process_improve() directly rather than routing through
calculate_covariates().
This vignette runs its live workflow when rendered locally. The
download, processing, and plotting chunks are skipped automatically on
CI, CRAN checks, and pkgdown builds; set
AMADEUS_RUN_VIGNETTES=true to force live execution in those
environments.
Available inputs and data availability
download_aqs() and download_improve()
expose the key availability choices for PM network downloads:
-
parameter_codeselects the EPA pollutant parameter. Common examples documented inamadeusinclude PM2.5 (88101and88502), PM10 (81102), ozone (44201), NO2 (42602), SO2 (42401), and CO (42101). -
resolution_temporalcurrently supports only"daily", so the downloadable files are daily monitor observations rather than hourly or annual summaries. -
yearaccepts a single year or a start/end pair, downloading one pre-generated EPA archive per year requested. - AQS downloads arrive as zipped annual files and can be unzipped
automatically with
unzip = TRUE; the workflow then reads the extracted daily CSV files. - IMPROVE supports
product = "raw","rhr2", or"rhr3"; this workflow uses"raw"and filters toParamCode == "FPM"(fine particulate mass, PM2.5 indicator). - Neither AQS nor IMPROVE requires authentication, and both are intended for outcome modeling workflows.
Download PM2.5 and NO2 daily AQS data
aqs_dir <- file.path(tempdir(), "aqs_workflow")
for (i in seq_len(nrow(aqs_parameters))) {
download_aqs(
parameter_code = aqs_parameters$parameter_code[i],
resolution_temporal = "daily",
year = 2022,
directory_to_save = aqs_dir,
acknowledgement = TRUE,
unzip = TRUE,
remove_zip = FALSE
)
}Process PM2.5 and NO2 with process_aqs()
aqs_processed <- lapply(seq_len(nrow(aqs_parameters)), function(i) {
csv_path <- file.path(
aqs_dir,
"data_files",
sprintf("daily_%s_2022.csv", aqs_parameters$parameter_code[i])
)
locations <- process_aqs(
path = csv_path,
date = aqs_date_window,
mode = "location",
return_format = "sf"
)
locations$pollutant <- aqs_parameters$pollutant[i]
daily_data <- process_aqs(
path = csv_path,
date = aqs_date_window,
mode = "available-data",
return_format = "data.table"
)
daily_data[, pollutant := aqs_parameters$pollutant[i]]
list(
locations = locations,
daily = daily_data
)
})Plot monitor locations with facets for PM2.5 and NO2
aqs_locations <- do.call(
rbind,
lapply(aqs_processed, `[[`, "locations")
)
aqs_time_series <- data.table::rbindlist(
lapply(aqs_processed, `[[`, "daily"),
fill = TRUE
)
ggplot2::ggplot() +
ggplot2::geom_sf(data = aqs_locations, color = "#0072B2", size = 0.8, alpha = 0.8) +
ggplot2::facet_wrap(~ pollutant, ncol = 2) +
ggplot2::coord_sf(datum = NA) +
ggplot2::labs(
title = "AQS monitor locations for PM2.5 and NO2",
subtitle = paste(aqs_date_window[1], "to", aqs_date_window[2]),
x = NULL,
y = NULL
) +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.spacing = grid::unit(1, "lines")
)Show an example time series of the downloaded AQS data
aqs_time_series[, time := as.Date(time)]
example_sites <- aqs_time_series[
,
.N,
by = .(pollutant, site_id)
][
order(pollutant, -N, site_id)
][
,
.SD[1],
by = pollutant
]
aqs_time_series <- merge(
aqs_time_series,
example_sites[, .(pollutant, site_id)],
by = c("pollutant", "site_id")
)
ggplot2::ggplot(
aqs_time_series,
ggplot2::aes(x = time, y = Arithmetic.Mean, group = site_id)
) +
ggplot2::geom_line(color = "#D55E00", linewidth = 0.5) +
ggplot2::geom_point(color = "#D55E00", size = 0.8) +
ggplot2::facet_wrap(~ pollutant, scales = "free_y", ncol = 2) +
ggplot2::scale_x_date(
date_breaks = "2 weeks",
date_labels = "%b %d"
) +
ggplot2::labs(
title = "Example AQS daily time series",
subtitle = "One site per pollutant, chosen from monitors with available observations in this date window",
x = "Date",
y = "Arithmetic mean"
) +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.spacing = grid::unit(1, "lines"),
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)
)Download IMPROVE daily PM2.5 data (FPM, raw
product)
improve_dir <- file.path(tempdir(), "improve_workflow")
download_improve(
year = 2022,
product = improve_parameters$product[1],
directory_to_save = improve_dir,
acknowledgement = TRUE
)Process IMPROVE PM2.5 with process_improve()
improve_all <- process_improve(
path = improve_dir,
product = improve_parameters$product[1],
date = improve_date_window,
return_format = "data.table"
)
improve_pm25 <- improve_all[
ParamCode == improve_parameters$parameter_code[1] & Status == "V0"
]
improve_locations_raw <- process_improve(
path = improve_dir,
product = improve_parameters$product[1],
date = improve_date_window,
return_format = "sf"
)
improve_locations <- unique(
improve_locations_raw[
improve_locations_raw$ParamCode == improve_parameters$parameter_code[1] &
improve_locations_raw$Status == "V0",
][, c("SiteCode", "ParamCode", "Status")]
)Plot IMPROVE monitor locations
ggplot2::ggplot() +
ggplot2::geom_sf(data = improve_locations, color = "#009E73", size = 1, alpha = 0.8) +
ggplot2::coord_sf(datum = NA) +
ggplot2::labs(
title = "IMPROVE PM2.5 (FPM) monitor locations",
subtitle = paste(improve_date_window[1], "to", improve_date_window[2]),
x = NULL,
y = NULL
) +
ggplot2::theme_minimal()Show an example IMPROVE PM2.5 time series
improve_pm25[, FactDate := as.Date(FactDate)]
example_improve_site <- improve_pm25[
,
.N,
by = SiteCode
][
order(-N, SiteCode)
][1, SiteCode]
improve_example_ts <- improve_pm25[SiteCode == example_improve_site]
ggplot2::ggplot(
improve_example_ts,
ggplot2::aes(x = FactDate, y = FactValue, group = SiteCode)
) +
ggplot2::geom_line(color = "#0072B2", linewidth = 0.5) +
ggplot2::geom_point(color = "#0072B2", size = 0.8) +
ggplot2::scale_x_date(
date_breaks = "2 weeks",
date_labels = "%b %d"
) +
ggplot2::labs(
title = "Example IMPROVE PM2.5 (FPM) time series",
subtitle = paste("Site:", example_improve_site),
x = "Date",
y = "PM2.5 mass concentration (ug/m^3)"
) +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)
)