import requests
from lxml.html import parse
from io import StringIO
import xarray as xr
from datetime import datetime
import rioxarray # used magically via xr
import dasklibrary(rvest)
library(stringr)
library(stars)We extract some list of urls to netcdf files:
url = "https://noaadata.apps.nsidc.org/NOAA/G02202_V4/north/daily/1978/"
r = requests.get(url)
tree = parse(StringIO(r.text)).getroot()
files = [a.text for a in tree.iter('a')]
sic_files = [i for i in files if 'conc' in i]
sic_urls = [url + s for s in sic_files]url = "https://noaadata.apps.nsidc.org/NOAA/G02202_V4/north/daily/1978/"
page <- read_html(url)
files <- page %>% html_nodes("a") %>% html_attr("href")
urls <- paste0(url, files)
sic_urls <- urls |> subset(str_detect(urls, "conc"))We open all these netcdf files lazily over the virtual filesystem, allowing us to efficiently subset just what we need.
cdr = xr.open_mfdataset(sic_urls,
variable="cdr_seaice_conc",
engine="rasterio",
concat_dim="time",
combine="nested")dates <- stringr::str_extract(sic_urls, "\\d{8}") |> lubridate::ymd()
cdr <- read_stars(paste0("/vsicurl/", sic_urls),
"cdr_seaice_conc",
along = list(time = dates),
quiet = TRUE)