this function is developed to avoid
random errors in compressing and decompressing R function objects
with qs::qsave
and qs::qread
. If you encounter such errors, please use
this function with function name strings to save and load the function
objects.
Note
The function name string must include two colons ::
.
Also, the package preceding the two colons should be loaded in the
current environment.
Examples
unmarshal_function("amadeus::process_aqs")
#> function(
#> path = NULL,
#> date = c("2018-01-01", "2022-12-31"),
#> mode = c("date-location", "available-data", "location"),
#> data_field = "Arithmetic.Mean",
#> return_format = c("terra", "sf", "data.table"),
#> extent = NULL,
#> ...
#> ) {
#> mode <- match.arg(mode)
#> return_format <- match.arg(return_format)
#> if (!is.null(date)) {
#> date <- try(as.Date(date))
#> if (inherits(date, "try-error")) {
#> stop("date has invalid format(s). Please check the values.")
#> }
#> if (length(date) != 2) {
#> if (length(date) == 1) {
#> date <- c(date, date)
#> } else {
#> stop("date should be a character vector of length 1 or 2.")
#> }
#> }
#> } else {
#> stop("date should be defined.")
#> }
#> if (length(path) == 1 && dir.exists(path)) {
#> path <- list.files(
#> path = path,
#> pattern = "*.csv$",
#> full.names = TRUE
#> )
#> }
#>
#> if (length(path) == 0) {
#> stop("path does not contain csv files.")
#> }
#> pathfiles <- lapply(path, read.csv)
#>
#> sites <- data.table::rbindlist(pathfiles, fill = TRUE)
#>
#> ## get unique sites
#> sites$site_id <-
#> sprintf(
#> "%02d%03d%04d%05d",
#> as.integer(sites$State.Code),
#> as.integer(sites$County.Code),
#> as.integer(sites$Site.Num),
#> as.integer(sites$Parameter.Code)
#> )
#>
#> site_id <- NULL
#> Datum <- NULL
#> POC <- NULL
#> Date.Local <- NULL
#> Sample.Duration <- NULL
#>
#> date_start <- as.Date(date[1])
#> date_end <- as.Date(date[2])
#> date_sequence <- seq(date_start, date_end, "day")
#> date_sequence <- as.character(date_sequence)
#>
#> # select relevant fields only
#> sites <- sites |>
#> dplyr::as_tibble() |>
#> dplyr::filter(as.character(Date.Local) %in% date_sequence) |>
#> dplyr::filter(startsWith(Sample.Duration, "24")) |>
#> dplyr::group_by(site_id) |>
#> dplyr::filter(POC == min(POC)) |>
#> dplyr::mutate(time = Date.Local) |>
#> dplyr::ungroup()
#> col_sel <- c("site_id", "Longitude", "Latitude", "Datum")
#> if (mode != "available-data") {
#> sites_v <- unique(sites[, col_sel])
#> } else {
#> col_sel <- append(col_sel, "Event.Type")
#> col_sel <- append(col_sel, "time")
#> col_sel <- append(col_sel, data_field)
#> sites_v <- sites |>
#> dplyr::select(dplyr::all_of(col_sel)) |>
#> dplyr::distinct()
#> # excluding site-time with multiple event types
#> # sites_vdup will be "subtracted" from the original sites_v
#> sites_vdup <- sites_v |>
#> dplyr::group_by(site_id, time) |>
#> dplyr::filter(dplyr::n() > 1) |>
#> dplyr::filter(!!dplyr::sym("Event.Type") == "Excluded") |>
#> dplyr::ungroup()
#> sites_v <-
#> dplyr::anti_join(
#> sites_v,
#> sites_vdup,
#> by = c("site_id", "time", "Event.Type")
#> )
#> }
#> names(sites_v)[2:3] <- c("lon", "lat")
#> sites_v <- data.table::as.data.table(sites_v)
#>
#> # subset mainland
#> sites_v <- sites_v[!grepl("^(02|15|72|78|6|80)", site_id), ]
#>
#> # NAD83 to WGS84
#> sites_v_nad <-
#> sites_v[sites_v$Datum == "NAD83", ]
#> sites_v_nad <-
#> terra::vect(
#> sites_v_nad,
#> keepgeom = TRUE,
#> crs = "EPSG:4269"
#> )
#> sites_v_nad <- terra::project(sites_v_nad, "EPSG:4326")
#> # postprocessing: combine WGS84 and new WGS84 records
#> sites_v_nad <- as.data.frame(sites_v_nad)
#> sites_v_wgs <- sites_v[sites_v$Datum == "WGS84"]
#> final_sites <- data.table::rbindlist(
#> list(sites_v_wgs, sites_v_nad),
#> fill = TRUE
#> )
#> final_sites <-
#> final_sites[,
#> grep("Datum", names(final_sites), invert = TRUE),
#> with = FALSE
#> ]
#>
#> if (mode == "date-location") {
#> final_sites <-
#> split(date_sequence, date_sequence) |>
#> lapply(function(x) {
#> fs_time <- final_sites
#> fs_time$time <- x
#> return(fs_time)
#> })
#> final_sites <- data.table::rbindlist(final_sites, fill = TRUE)
#> }
#> if (mode == "available-data") {
#> final_sites <- unique(final_sites)
#> }
#>
#> final_sites <-
#> switch(
#> return_format,
#> terra = terra::vect(
#> final_sites,
#> keepgeom = TRUE,
#> crs = "EPSG:4326"
#> ),
#> sf = sf::st_as_sf(
#> final_sites,
#> remove = FALSE,
#> dim = "XY",
#> coords = c("lon", "lat"),
#> crs = "EPSG:4326"
#> ),
#> data.table = final_sites
#> )
#> if (!is.null(extent)) {
#> if (return_format == "data.table") {
#> warning(
#> "Extent is not applicable for data.table. Returning data.table...\n"
#> )
#> return(final_sites)
#> }
#> final_sites <- apply_extent(final_sites, extent)
#> }
#>
#> return(final_sites)
#> }
#> <bytecode: 0x5600f2143e98>
#> <environment: namespace:amadeus>