From 6f2466815c8247c6ee310aa376d0143c085656c1 Mon Sep 17 00:00:00 2001 From: LTLA Date: Thu, 22 Aug 2024 14:02:53 -0700 Subject: [PATCH] Implement download function that preseves the remote's last-modified. This ensures that the timestamps are accurate so that the cache can be accurately updated. For consistency, we replace all other instances of download.file; our implementation also has friendlier errors. --- NAMESPACE | 1 - R/retrieveDirectory.R | 33 +++++++-------------------------- R/startSewerRat.R | 5 +---- R/utils.R | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+), 31 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a76f829..d866e29 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,5 +12,4 @@ export(stopSewerRat) import(httr2) import(methods) importFrom(utils,URLencode) -importFrom(utils,download.file) importFrom(utils,head) diff --git a/R/retrieveDirectory.R b/R/retrieveDirectory.R index 416d512..7714ae3 100644 --- a/R/retrieveDirectory.R +++ b/R/retrieveDirectory.R @@ -82,33 +82,12 @@ retrieveDirectory <- function(path, url, cache=NULL, forceRemote=FALSE, overwrit final } +#' @importFrom utils URLencode full_file_url <- function(url, path) { paste0(url, "/retrieve/file?path=", URLencode(path, reserved=TRUE)) } -get_remote_last_modified <- function(url, path) { - req <- request(full_file_url(url, path)) - req <- req_method(req, "HEAD") - req <- handle_error(req) - res <- req_perform(req) - remote_mod <- resp_header(res, "last-modified") - - if (is.null(remote_mod)) { - warning("failed to find 'last-modified' header from the SewerRat API") - return(NULL) - } - - remote_mod <- as.POSIXct(remote_mod, format="%a, %d %b %Y %H:%M:%S", tz="GMT") - if (is.na(remote_mod)) { - warning("invalid 'last-modified' header from the SewerRat API") - return(NULL) - } - - return(remote_mod) -} - #' @import httr2 -#' @importFrom utils download.file acquire_file_raw <- function(cache, path, url, overwrite, updateDelay) { target <- file.path(cache, "LOCAL", path) @@ -117,7 +96,11 @@ acquire_file_raw <- function(cache, path, url, overwrite, updateDelay) { } else if (!overwrite) { last_mod <- file.info(target)$mtime if (last_mod + updateDelay < Sys.time()) { # only check older files for updates, to avoid excessive queries. - remote_mod <- get_remote_last_modified(url, path) + req <- request(full_file_url(url, path)) + req <- req_method(req, "HEAD") + req <- handle_error(req) + res <- req_perform(req) + remote_mod <- parse_remote_last_modified(res) if (!is.null(remote_mod) && remote_mod > last_mod) { overwrite <- TRUE } @@ -130,9 +113,7 @@ acquire_file_raw <- function(cache, path, url, overwrite, updateDelay) { tempf <- tempfile(tmpdir=tempdir) on.exit(unlink(tempf), add=TRUE, after=FALSE) - if (download.file(full_file_url(url, path), tempf)) { - stop("failed to download '", path, "'") - } + download_file(full_file_url(url, path), tempf) dir.create(dirname(target), recursive=TRUE, showWarnings=FALSE) file.rename(tempf, target) # this should be more or less atomic, so no need for locks. } diff --git a/R/startSewerRat.R b/R/startSewerRat.R index 7ae6c74..210a37b 100644 --- a/R/startSewerRat.R +++ b/R/startSewerRat.R @@ -25,7 +25,6 @@ #' startSewerRat() # initialize a new instance. #' #' @export -#' @importFrom utils download.file startSewerRat <- function(db=tempfile(fileext=".sqlite3"), port=NULL, wait = 1, version = "1.0.6", overwrite = FALSE) { if (!is.null(running$active)) { return(list(new=FALSE, port=running$port, url=assemble_url(running$port))) @@ -59,9 +58,7 @@ startSewerRat <- function(db=tempfile(fileext=".sqlite3"), port=NULL, wait = 1, if (!file.exists(exe) || overwrite) { url <- paste0("https://github.com/ArtifactDB/SewerRat/releases/download/", version, "/", desired) tmp <- tempfile() - if (download.file(url, tmp)) { - stop("failed to download the SewerRat binary") - } + download_file(url, tmp) Sys.chmod(tmp, "0755") # Using a write-and-rename paradigm to provide some atomicity. Note diff --git a/R/utils.R b/R/utils.R index 4d3af68..3145ff3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -49,3 +49,35 @@ clean_path <- function(path) { keep <- c("", keep) # add back the root. paste(keep, collapse="/") } + +#' @import httr2 +parse_remote_last_modified <- function(res) { + remote_mod <- resp_header(res, "last-modified") + + if (is.null(remote_mod)) { + warning("failed to find 'last-modified' header from the SewerRat API") + return(NULL) + } + + remote_mod <- as.POSIXct(remote_mod, format="%a, %d %b %Y %H:%M:%S", tz="GMT") + if (is.na(remote_mod)) { + warning("invalid 'last-modified' header from the SewerRat API") + return(NULL) + } + + return(remote_mod) +} + +#' @import httr2 +download_file <- function(url, path) { + req <- request(url) + req <- handle_error(req) + res <- req_perform(req, path=path) + + # The key part here is to set the modification time correctly, + # so that any updating mechanisms work correctly. + mod <- parse_remote_last_modified(res) + if (!is.null(mod)) { + Sys.setFileTime(path, mod) + } +}