Introduction
This script reads in hourly meteorological drivers and eddy covariance data from towers and generates averages by month and year that can be used to assess ELM and CLM. Currently, the script takes only “1-D” variables (i.e., no soil data), but this will be revisited in the future. Except for time information, all other variables are optional, but typically include the following:
- Time. Either multiple vectors defining the time, or one column with full time information, which should contain the following variables (any order).
- year
- month
- day
- hour
- minute
- Atmospheric pressure
- Air temperature
- Humidity. One or multiple of them may be provided; output will always the humidity variables provided, and will attempt to include specific humidity (which may require pressure and/or temperature as well).
- Specific humidity
- Vapour pressure
- Mixing ratio
- Relative humidity
- Wind speed
- Incident solar irradiance
- Precipitation
- Outgoing shortwave (solar) irradiance
- Outgoing longwave (thermal) irradiance
- Net irradiance (solar + thermal)
- Sensible heat flux (eddy covariance)
- Heat flux into ground
- Water vapour flux (eddy covariance)
- CO[2] flux (eddy covariance)
- Change in CO[2] storage in the canopy air space
- Friction velocity
- Gross Primary Productivity (GPP)
- Ecosystem respiration
- Net Ecosystem Productivity (NEP, positive means net carbon accumulation in ecosystem)
The example below shows how the data should look like (first 10 lines):
year,month,day,hour,min,atm.prss,atm.tmp,atm.shv,atm.vels,atm.vdir,rshort.in,rshort.out,rlong.in,rlong.out,rnet,rain,sens,fh2o,fco2,storco2,ustar,gep,reco,nee,atm.rhv
2004,1,1,0,0,999.5,24.41,18.42,2.9,58.8,0,0,443.76,444.82,-1.68,0,-30,0.01,9.92,NaN,0.32,0,9.26,9.26,97.87
2004,1,1,1,0,999.5,24.63,18.5,2.55,65.1,0,0,444.67,445.85,-1.77,0,NaN,0.02,12.95,NaN,0.32,0,9.26,9.26,96.97
2004,1,1,2,0,999.5,24.77,18.64,2.15,59.5,0,0,445.67,446.48,NaN,0,NaN,NaN,-3.32,8.57,0.18,0,5.25,5.25,96.9
2004,1,1,3,0,998.5,24.94,18.78,2.45,55.4,0,0,446.41,447.56,-2.3,0,NaN,0.04,9.63,6.59,0.28,0,16.22,16.22,96.56
2004,1,1,4,0,998.5,24.21,17.88,4.4,92.8,0,0,438.98,443.38,-4.4,4.2,NaN,NaN,NaN,-24.92,0.56,0,9.56,9.56,95.99
2004,1,1,5,0,998.5,23.73,17.76,3.5,101.8,0,0,437.42,440.86,-3.63,0.2,NaN,NaN,NaN,-1.06,0.47,0,9.56,9.56,98.16
2004,1,1,6,0,998.5,24.03,18.08,4.05,76.9,0,0,440.88,442.3,-1.44,2.6,NaN,NaN,NaN,-0.3,0.65,0,9.56,9.56,98.16
2004,1,1,7,0,999.5,23.44,17.5,4.65,64.9,1.09,NaN,436.54,438.12,-0.53,2.8,NaN,NaN,NaN,-2.17,0.76,0,9.56,9.56,98.5
2004,1,1,8,0,1000.5,23.54,17.63,1.95,96.9,33.97,3.72,433.63,438.75,25.13,0,-16.46,0.04,-1.42,3.71,0.22,7.26,9.56,2.3,98.73
2004,1,1,9,0,1001.5,23.93,18.06,0.75,211.2,93.71,9.58,432.94,440.62,76.44,0,NaN,NaN,-1.71,-1.31,0.15,9.27,9.56,0.29,98.9
2004,1,1,10,0,1002,24.67,18.73,0.3,206.8,239.25,21.8,419.64,442.04,195.05,0,16.87,0.11,-5.15,-5.04,0.13,18.62,9.56,-9.06,98.28
The data does not need to be gap-filled. We will use the average diurnal cycle with available data to build summaries at the monthly scale.
Reset session
Use this chunk to fully reset R.
# Unload all packages except for the R default ones
plist = names(sessionInfo()$otherPkgs)
if (length(plist) > 0){
dummy = sapply(X=paste0("package:",plist),FUN=detach,character.only=TRUE,unload=TRUE)
}#end if (length(plist) > 0)
# Remove all variables
rm(list=ls())
# Reset warnings
options(warn=0)
# Close all plots
invisible(graphics.off())
# Clean up
invisible(gc())
Main settings
Required packages
Load some packages. By default, we skip the warning messages, but it may be a good idea to visualise them if it is the first time running.
isfine = c( data.table = require(data.table ,quietly=TRUE,warn.conflicts=FALSE)
, lubridate = require(lubridate ,quietly=TRUE,warn.conflicts=FALSE)
, ncdf4 = require(ncdf4 ,quietly=TRUE,warn.conflicts=FALSE)
, patchwork = require(patchwork ,quietly=TRUE,warn.conflicts=FALSE)
, purrr = require(purrr ,quietly=TRUE,warn.conflicts=FALSE)
, RColorBrewer = require(RColorBrewer,quietly=TRUE,warn.conflicts=FALSE)
, reshape2 = require(reshape2 ,quietly=TRUE,warn.conflicts=FALSE)
, tidyverse = require(tidyverse ,quietly=TRUE,warn.conflicts=FALSE)
, viridis = require(viridis ,quietly=TRUE,warn.conflicts=FALSE)
)#end c
if (! all(isfine)){
cat0(" List of required packages, and the success status loading them:\n")
print(isfine)
stop(" Some packages are missing and must be installed.")
}
Paths
Set paths and files for input and output.
- main_path. The main path for processing data. The output path will be generated here.
- input_file. The csv file containing the gap-filled tower/AWS data (full path).
- output_path. The main output path for the site.
main_path = file.path(path.expand("~"),"Documents","LocalData","TowerData","Tanguro","make_single_site")
input_file = file.path(main_path,"input_csv","tb0_eddy-data_2008-2018.csv")
output_path = file.path(path.expand("~"),"Data","FATES_DataSets")
Tower information
Provide some basic information about the tower:
- xid. Unique Tower ID.
- site_desc. Site description (to be appended to the netCDF headers)
- dat_version. Version of this data set.
- site_lon. Site longitude (degrees east).
- site_lat. Site latitude (degrees north).
- site_alt. Site altitude (metres above sea level).
- site_refhgt. Height reference for observations (e.g., the tower height above ground).
- dxy. Intended grid size (we will generate a 1x1 pseudo-grid for FATES).
- undef_in. A string to denote missing values in the input (csv) file. In case multiple flags exist, use the vector concatenation format (e.g.,
c("NA","NaN","-999.9"))
- undef_out. A number to denote missing values in the output (NetCDF) file. This is needed by NetCDF.
xid = "tanguroMT-BR"
site_desc = "Tanguro, MT, Brazil"
dat_version = "1.0"
site_lon = -52.41
site_lat = -13.08
site_alt = 349.0
site_refhgt = 36.0
dxy = 1.0
undef_in = "NaN"
undef_out = -9999.99
Useful variables for time conversion: * UTC offset, if data are provided in local time. For example, if the location is in the Western Hemisphere and the time zone is UTC-3, then utc_off should be -3 (Important. If the input data are already in UTC, then set utc_off to zero). * Year offset, if year is provided with two digits. For example, if the data are for 2003-2016 but the years are in 03-16 format, set year_off to 2000. If the year is already provided in four digits, set it to zero.
utc_off = -4
year_off = 0
Next, we identify how time information is provided, by setting variable date_colname.
* If date_colname=NA_character_, then variable dateinfo must contain the names of the columns corresponding to year (year), month (month), day (day), hour (hour), and minute (min), respectively. If minute is absent, set it to NA_character_. * Otherwise, date_colname indicates the name of the column in the input data that has the time information (case sensitive). In this case, variable dateinfo should indicate the position of each piece of the time information, following the examples below. If minute is absent, set it to NA_integer_
| 06/29/2021 14:30:22 |
3 |
1 |
2 |
4 |
5 |
| 06/29/2021 14:30 |
3 |
1 |
2 |
4 |
5 |
| 29/06/2021 14:30 |
3 |
2 |
1 |
4 |
5 |
| 2021-06-29 14:30 |
1 |
2 |
3 |
4 |
5 |
| 06/29/2021 14 |
3 |
1 |
2 |
4 |
NA |
# Is date provided as multi-column (TRUE) or single string (FALSE)?
date_colname = NA_character_
# Time information.
dateinfo = c( year = "year", month = "month", day = "day", hour = "hour", min = "min")
# Examples of time information:
# - If date_colname is NA_character_
# dateinfo = c( year = "Year", month = "Month", day = "Date", hour = "Hour", min = "Minute")
#
# - If date_colname is a column in the input data.
# dateinfo = c( year = 3, month = 1, day = 2, hour = 4, min = 5)
How to interpret the time stamp. Currently there are three options, as described below. To help illustrating the cases, we give examples of the beginning and end of average period of an example time stamp (2021-06-11 08:00) for an hourly average data where the high-frequency data were collected at 1-minute resolution.
| 1 |
End of averaging period |
2021-06-11 07:01 |
2021-06-11 07:00 |
| 2 |
Beginning of averaging period |
2021-06-11 08:00 |
2021-06-11 08:59 |
| 3 |
Middle of averaging period |
2021-06-11 07:31 |
2021-06-11 08:30 |
imetavg = 1
Input variables
Here we will provide information about the eddy covariance variables. Our goal is to have the variables names and units as defined in the table below, to be consistent with ELM/CLM units:
| PBOT |
Atmospheric pressure |
Pa |
| TBOT |
Air temperature |
K |
| QBOT1 |
Specific humidity |
kg kg-1 |
| RBOT1 |
Vapour mixing ratio |
kg kg-1 |
| EBOT1 |
Vapour pressure |
Pa |
| RH1 |
Relative humidity |
% |
| WIND |
Wind speed |
m s-1 |
| FSDS |
Incident solar irradiance |
W m-2 |
| FLDS |
Incident longwave irradiance |
W m-2 |
| RAIN |
Precipitation rate |
kg m-2 s-1 (mm s-1) |
| FSR |
Outgoing short wave (solar) irradiance |
W m-2 |
| FIRE |
Outgoing long wave (thermal) irradiance |
W m-2 |
| Rnet |
Net irradiance (solar + thermal) |
W m-2 |
| FSH |
Sensible heat flux (eddy covariance) |
W m-2 |
| FGR |
Heat flux into ground |
W m-2 |
| QEVTR2 |
Water vapour flux (eddy covariance) |
kg m-2 s-1 (mm s-1) |
| USTAR |
Friction velocity |
W m-2 |
| GPP |
Gross Primary Productivity (GPP) |
gC m-2 s-1 |
| ER3 |
Ecosystem respiration |
gC m-2 s-1 |
| NEP |
Net Ecosystem Productivity (NEP, + is sink) |
gC m-2 s-1 |
Notes
- Only one of the humidity variables needs be provided, but we recommend QBOT and RH as these variables are also output variables in ELM and CLM. The output file will always have the one humidity variable provided, and will attempt to compute specific humidity (QBOT), as long as other potentially dependent variables (pressure and/or temperature) are also provided.
- Total water flux is not available as a variable in ELM or CLM. Instead, this can be calculated from the total evaporation from ground (QSOIL), evaporation from canopy surface (QVEGE), and transpiration (QVEGT). Make sure that your ELM and CLM output has these variables.
- Ecosystem respiration is only available under some configurations in ELM or CLM. More often, it will be calculated from the total autotrophic respiration (AR) and total heterotrophic respiration (HR). Make sure that your ELM and CLM output has these variables.
First, we define some unit conversion constants for the chunk below
Pa_2_hPa = 0.01 ; hPa_2_Pa = 1. / Pa_2_hPa
K_2_degC = -273.15 ; degC_2_K = - K_2_degC
kg_2_g = 1000. ; g_2_kg = 1. / kg_2_g
sec_2_hr = 1./3600. ; hr_2_sec = 1. / sec_2_hr
sec_2_day = 1./86400. ; day_2_sec = 1. / sec_2_day
frac_2_pc = 100. ; pc_2_frac = 1. / frac_2_pc
umol_2_gC = 1.20107e-5; gC_2_umol = 1. / umol_2_gC
Then use the varinfo list to standardise the output. This is a tibble object (defined with tribble), containing the following elements:
- vfates. Variable name from the table above (don’t change this value).
- vlname. Long name for variable (don’t change this value).
- vunits. Units for the variable (same as the table above, don’t change this value).
- vinput. Variable name in the input file.
- add0. Value to be added to the original input
- mult. Value to be multiplied to the original input
For add0 and mult, except for trivial factors (add0=0. or mult=1), refrain from typing “magic numbers” here. Instead, use the unit conversion parameters described in the chunk above. In case the sought unit conversion is not available in the previous chunk, feel free to add more factors.
varinfo = tribble( ~vfates , ~vlname , ~vunits , ~vinput , ~add0 , ~mult
, "PBOT" , "surface pressure at the tower" , "Pa" , "atm.prss" , 0. , hPa_2_Pa
, "TBOT" , "temperature at the tower" , "K" , "atm.tmp" , degC_2_K, 1.
, "QBOT" , "specific humidity at the tower" , "kg/kg" , "atm.shv" , 0. , g_2_kg
, "WIND" , "wind at the tower" , "m/s" , "atm.vels" , 0. , 1.
, "FSDS" , "incident solar radiation at the tower" , "W/m2" , "rshort.in" , 0. , 1.
, "FLDS" , "incident thermal radiation at the tower", "W/m2" , "rlong.in" , 0. , 1.
, "PRECTmms", "precipitation rate at the tower" , "mm/s" , "rain" , 0. , sec_2_hr
, "FSR" , "outgoing shortwave (solar) irradiance" , "W/m2" , "rshort.out", 0. , 1.
, "FIRE" , "outgoing long wave (thermal) irradiance", "W/m2" , "rlong.out" , 0. , 1.
, "Rnet" , "Net irradiance (solar + thermal)" , "W/m2" , "rnet" , 0. , 1.
, "FSH" , "Sensible heat flux" , "W/m2" , "fsens" , 0. , 1.
, "QEVTR" , "Total water vapour flux" , "mm/s" , "fh2o" , 0. , sec_2_hr
, "USTAR" , "Friction velocity" , "m/s" , "ustar" , 0. , 1.
, "GPP" , "Gross primary productivity" , "gC/m2/s", "gep" , 0. , umol_2_gC
, "ER" , "Ecosystem respiration" , "gC/m2/s", "reco" , 0. , umol_2_gC
, "NEP" , "Net Ecosystem productivity (+ is sink)" , "gC/m2/s", "nee" , 0. , -umol_2_gC
)#end tribble
This concludes the initial settings. From this point on, you may not need to change anything, unless you are debugging or adding new features.
Data processing
Met driver loading
Read in the csv file.
eddy_orig = read.csv( file = input_file
, header = TRUE
, comment.char = ""
, na.strings = undef_in
, stringsAsFactors = FALSE
)#end read.csv
eddy_orig = tibble(eddy_orig)
Standardise time. It must be a vector of type lubritime.
if (is.na(date_colname)){
# Create minute column in case it is missing.
if (is.na(dateinfo[["min"]])){
dateinfo[["min"]] = "Minute"
eddy_orig$Minute = rep(x=0,times=nrow(eddy_orig))
}#end if (is.na(dateinfo[["min"]]))
# Find date variables and rename them.
idx = match(dateinfo,names(eddy_orig))
names(eddy_orig)[idx] = names(dateinfo)
}else{
# Split the time label
decomp_time = lapply( X = tstrsplit( eddy_orig[[date_colname]],"[^0-9]")
, FUN = as.numeric
)#end lapply
# In case minute is missing, append a vector with zeroes.
if (is.na(dateinfo[["min"]])){
decomp_time = c(decomp_time,list(0*decomp_time[[1]]))
dateinfo[["min"]] = length(decomp_time)
}#end if (is.na(dateinfo[["min"]]))
# Select list elements that are useful for time.
decomp_time = decomp_time[dateinfo]
names(decomp_time) = names(dateinfo)
decomp_time = as.data.table(decomp_time)
# Merge time information to the main tibble.
eddy_orig = as_tibble(cbind(eddy_orig,decomp_time))
}#end if (is.na(date_colname) && is.na(dateinfo[["min"]]))
# Create lubridate-friendly time stamp
eddy_orig = eddy_orig %>%
mutate( year = year + year_off
, tstamp = make_datetime( year = year
, month = month
, day = day
, hour = hour
, min = min
, tz = "UTC"
)#end make_datetime
)#end mutate
Time standardisation
Find the time difference, and assess the first and last year with full data.
#---- Time interval between observations.
dtstamp = mean(diff(eddy_orig$tstamp))
#---- Number of observations per day.
nperday = time_length(make_difftime(day=1),unit="hour") / time_length(x=dtstamp,unit="hour")
#---- Find which years have complete data
datsumm = group_by (.data=eddy_orig,year,month) %>%
summarise( count = n()) %>%
ungroup () %>%
mutate (expect = days_in_month(make_datetime(year=year,month=month)) * nperday) %>%
group_by (year) %>%
summarise(fine = all(count == expect)) %>%
ungroup ()
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
#---- Find first and last complete years
allfine = all(datsumm$fine)
yeara = min(datsumm$year[datsumm$fine])
yearz = max(datsumm$year[datsumm$fine])
#---- Find and last expected time
tstampa = make_datetime( year=yeara ,month=1,day=1)
tstampz = make_datetime( year=yearz+1,month=1,day=1)
Correct data to be in UTC whilst accounting for the time stamp. For FATES, the time stamp should always correspond to the middle of the averaging window.
if (imetavg == 1){
dt.utc = make_difftime(hour=-utc_off) - 0.5 * dtstamp
}else if (imetavg == 2){
dt.utc = make_difftime(hour=-utc_off) + 0.5 * dtstamp
}else if (imetavg == 3){
dt.utc = make_difftime(hour=-utc_off)
}#end if (imetavg)
eddy_data = eddy_orig %>%
mutate(tstamp = tstamp + dt.utc) %>%
select(!c(year,month,day,hour,min))
With the time zone shift, we may have a few data points off at the beginning or at the end.
As a simple solution, we just shift these hours to the beginning or the end of the simulation, just so every year/month has all the data.
eddy_data = eddy_data %>%
mutate( tstamp = case_when( tstamp < tstampa ~ tstampz + difftime(tstamp,tstampa)
, tstamp > tstampz ~ tstampa + difftime(tstamp,tstampz)
, TRUE ~ tstamp
)#end when
) %>% arrange(tstamp)
Unit standardisation for output
Simplify output so variables are in the correct units, and only variables needed for the output remain in the output.
eddy_fates = eddy_data %>%
select(c(tstamp,varinfo$vinput)) %>%
rename_at(vars(c("tstamp",varinfo$vinput)), ~ c("time",varinfo$vfates))
Fix units for all variables:
# Create a simple addition tibble
add0 = varinfo %>% mutate(id=1) %>% select(c(id,vfates,add0)) %>%
spread(vfates,add0) %>% select(-id) %>% relocate(varinfo$vfates)
# Create a simple multiplication tibble
mult = varinfo %>% mutate(id=1) %>% select(c(id,vfates,mult)) %>%
spread(vfates,mult) %>% select (! id) %>% relocate(varinfo$vfates)
# Fix units. We temporarily remove time from the tibble, so we can add and multiply.
tstamp = eddy_fates$time
eddy_fates = eddy_fates %>% select(! time)
eddy_fates = as_tibble(map2(eddy_fates,add0,function(x,y) if (is.numeric(x)){x+y}else{x}))
eddy_fates = as_tibble(map2(eddy_fates,mult,function(x,y) if (is.numeric(x)){x*y}else{x}))
eddy_fates = eddy_fates %>% mutate( time = tstamp) %>% relocate(c("time",varinfo$vfates))
Derive humidity
This step is only needed if specific humidity was not provided. In this case, we derive specific humidity (\(q_{v}\)) [ \(\mathrm{kg_{v}\,kg_{a}^{-1}}\)] using one the following relationships (in this order of preference), using the fact that specific humidity is the ratio of mass (or partial density) of water vapour \(\rho_{v}\) [\(\mathrm{kg_{v}\,m^{-3}}\)] and total mass (or density) of air (\(\rho_{a}\)) [\(\mathrm{kg_{a}\,m^{-3}}\)].
Mixing ratio \(r_v\) [ \(\mathrm{kg_{v}\,kg_{d}^{-1}}\)] is defined as the ratio between partial density of water vapour \(\rho_{v}\) and partial density of dry air (\(\rho_{d}\)) [\(\mathrm{kg_{d}\,m^{-3}}\)]. Assuming that \(\rho_{a} = \rho_{v} + \rho_{d}\), we can relate mixing ratio and specific humidity as: \[\begin{align} & q_{v} = \frac{r_{v}}{r_{v}+1} \end{align}.\]
To convert from water vapour pressure \(e_{v}\) [\(\mathrm{Pa}\)] to \(q_v\), we use the ideal gas equation of a mixture of dry air and water vapour , and account for the difference in molar masses of water vapour (\(\mathcal{M}_v = 1.802 \times 10^{-2}\,\mathrm{kg\,mol^{-1}}\)) and dry air (\(\mathcal{M}_d = 2.897 \times 10^{-2}\,\mathrm{kg\,mol^{-1}}\)): \[\begin{align}
& q_{v} = \frac{\varepsilon{}\,e_{v}}{p - \left(1 - \varepsilon{}\right)\,e_{v}}
\end{align},\] where \(p\) [\(\mathrm{Pa}\)] is the total air pressure and \(\varepsilon = \mathcal{M}_{v} / \mathcal{M}_{d}\).
Relative humidity (\(\mathrm{RH}\)) [\(\%\)] is defined as \(\mathrm{RH} = 100 \, e_{v}/e_{v}^{\star}\), where \(e_{v}^{\star}\) is the vapour pressure at saturation, which depends on temperature \(T\) [\(\mathrm{K}\)]. To derive specific humidity, we use (Bolton 1980) empirical equation for \(e_{v}^{\star}\) to derive \(e_{v}\): \[\begin{align}
& e_{v} = 0.01 \, \mathrm{RH} \,e_{0}\,\exp{\left[ \frac{17.67\,\left( T - T_{0}\right)}{T - 29.65} \right]}
\end{align},\] where \(e_{0} = 611.66\,\mathrm{Pa}\) is the vapour pressure of saturation at \(T_{0}=273.15\,\mathrm{K}\). We then convert \(e_{v}\) to \(q_{v}\) using the same equation described above.
#---~---
# Define some thermodynamic constants.
#---~---
# Molar mass of water vapour [kg/mol]
mm_h2o = 0.01801505
# Molar mass of dry air [kg/mol]
mm_dry = 0.02897
# Molar mass ratio
eps_mol = mm_h2o / mm_dry
# Saturation vapour pressure at 273.15K [Pa]
esat_0C = 611.65685464
if ("QBOT" %in% names(eddy_fates)){
# Specific humidity already in the meteorological driver, find relative humidity.
cat(" - Specific humidity included in the input data. No action needed.\n")
add_qbot = FALSE
}else if(all(c("RH","PBOT","TBOT") %in% names(eddy_fates))){
# Find specific humidity from vapour pressure
eddy_fates = eddy_fates %>%
mutate( ESAT = esat_0C * exp( 17.67 * (TBOT - degC_2_K) / (TBOT - 29.65))
, EBOT = pc_2_frac * RH * ESAT
, QBOT = eps_mol * EBOT / (PBOT - (1. - eps_mol) * EBOT)
) %>% select(! c(ESAT,EBOT))
add_qbot = TRUE
}else if (all(c("EBOT","PBOT") %in% names(eddy_fates))){
# Find specific humidity from vapour pressure
eddy_fates = eddy_fates %>%
mutate( QBOT = eps_mol * EBOT / (PBOT - (1. - eps_mol) * EBOT) )
add_qbot = TRUE
}else if ("RBOT" %in% names(eddy_fates)){
# Find specific humidity from mixing ratio
eddy_fates = eddy_fates %>%
mutate( QBOT = RBOT / (1. + RBOT) )
add_qbot = TRUE
}else{
# It is not possible to get QBOT from the provided variables.
add_qbot = FALSE
}#end if ("QBOT" %in% names(eddy_fates))
## - Specific humidity included in the input data. No action needed.
# Update or insert specific humidity to the list of output variables.
if (add_qbot){
varinfo = varinfo %>%
rows_upsert( tibble( vfates = "QBOT"
, vlname = "specific humidity at the tower"
, vunits = "kg/kg"
)#end tibble
)#end rows_upsert
}#end if (add_qbot)
Data output
Create paths for output, using FATES convention.
# Labels for describing the data set.
ymd_now = today(tzone = "UTC")
ymd_lab = sprintf("%4.4i%2.2i%2.2i",year(ymd_now),month(ymd_now),day(ymd_now))
# Tag specific for this site, grid, and version (used for file and path names).
site_tag = paste0("1x1pt-",xid,"_v",dat_version,"_c",ymd_lab)
# Path to where we will write the drivers.
eddy_base = paste0(site_tag,"_eddy-summ.nc")
eddy_path = file.path(output_path,site_tag)
eddy_file = file.path(eddy_path,eddy_base)
# Create directory
dummy = dir.create(path=eddy_path ,showWarnings=FALSE,recursive=TRUE)
Group data by month and year. Because we may have missing data, we take first the mean diurnal cycle for each month and year, then find the monthly averages. This reduces the risk of biases in the monthly average estimate due to frequent missing data in one time of the day, for example.
# Summarise data by hour month and year, then summary by month and year.
eddy_emean = eddy_fates %>%
mutate( year = year(time), month=month(time), hour=hour(time)) %>%
select(! c(time)) %>%
group_by( year,month,hour) %>%
summarise_all( ~mean(.,na.rm=TRUE)) %>%
ungroup() %>%
select(! c(hour)) %>%
group_by( year,month) %>%
summarise_all( ~mean(.,na.rm=FALSE)) %>%
ungroup() %>%
mutate( time = make_datetime(year=year,month=month,tz="UTC")) %>%
select(! c(year,month)) %>%
select(c("time",varinfo$vfates))
# Set the number of times to be written to the output
nemean = nrow(eddy_emean)
Generate edge information for the single data point.
# Find precision for the coordinates (typically 1% of the grid size).
outprec = -floor(log10(dxy*0.01))
# Standardise coordinates (and make sure latitude cannot exceed the poles).
outlon = round(site_lon,outprec) %% 360.
outlat = max(-90,min(90-0.5*dxy,round(site_lat,outprec)))
# Find edges.
edge_w = (outlon - 0.5 * dxy) %% 360.
edge_e = (outlon + 0.5 * dxy) %% 360.
edge_s = outlat - 0.5 * dxy
edge_n = outlat + 0.5 * dxy
Define the template for the global attributes.
# Define the code developer information (indirect way so the email is not visible).
developer_name = c( 111L, 103L, 110L, 111L, 76L, 32L, 115L, 111L, 99L, 114L, 97L, 77L)
developer_email = c( 118L, 111L, 103L, 46L, 108L, 98L, 108L, 64L, 111L, 103L, 110L, 111L
, 108L, 109L)
# Define the template. We will update the title in each time step.
att_template = list( title = "To be replaced when looping through months"
, version = dat_version
, date_created = paste0(as.character(now(tzone="UTC")), "UTC")
, source_code = "make_fates_tower_summary.Rmd"
, code_notes = "Eddy covariance summary for benchmarking ELM-FATES and CLM-FATES"
, code_developer = paste0( intToUtf8(rev(developer_name))
," <"
, intToUtf8(rev(developer_email))
,">"
)#end paste0
, file_author = paste0(author_name," <",author_email,">")
, data_provider = paste0(datprov_name," <",datprov_email,">")
, usage_notes = data_usage_notes
)#end list
Create a single NetCDF file with all the averages by month and year. We do not save the monthly average across all years because simulations may or may not overlap with all the observation months and years.
# Extract time, and turn it into a difference in months since the first time
year_first = year (eddy_emean$time[1])
month_first = month(eddy_emean$time[1])
time_first = eddy_emean$time[1]
# Extract time, and turn it into a difference in months
tsince = as.numeric(difftime(eddy_emean$time,time_first,units="days"))
# In case file exists, it will be re-created.
cat(" + Write averages by month and year to ",eddy_base,".\n",sep="")
if (file.exists(eddy_file)) file.remove(eddy_file)
# Add dimensions: longitude, latitude, and time. We do not automatically create the
# dimension variable for time because R would create it in double precision. Instead,
# we append variable time manually.
xx = ncdim_def( name="lon" ,units="",vals=1L ,create_dimvar=FALSE)
yy = ncdim_def( name="lat" ,units="",vals=1L ,create_dimvar=FALSE)
tt = ncdim_def( name="time" ,units="",vals=seq_along(tsince),create_dimvar=FALSE)
ss = ncdim_def( name="scalar",units="",vals=1L ,create_dimvar=FALSE)
# List of dimensions, useful for setting variables.
nc_xy = list (xx,yy)
nc_xyt = list(xx,yy,tt)
nc_t = list (tt)
nc_s = list(ss)
xy = c(1,1)
xyt = c(1,1,nemean)
# Start list with variables. First we put the coordinates
nc_vlist = list()
nc_vlist$LONGXY = ncvar_def( name = "LONGXY"
, units = "degrees_east"
, dim = nc_xy
, missval = undef_out
, longname = "longitude"
)#end ncvar_def
nc_vlist$LATIXY = ncvar_def( name = "LATIXY"
, units = "degrees_north"
, dim = nc_xy
, missval = undef_out
, longname = "latitude"
)#end ncvar_def
nc_vlist$EDGEW = ncvar_def( name = "EDGEW"
, units = "degrees_east"
, dim = nc_s
, missval = undef_out
, longname = "western edge in atmospheric data"
)#end ncvar_def
nc_vlist$EDGEE = ncvar_def( name = "EDGEE"
, units = "degrees_east"
, dim = nc_s
, missval = undef_out
, longname = "eastern edge in atmospheric data"
)#end ncvar_def
nc_vlist$EDGES = ncvar_def( name = "EDGES"
, units = "degrees_north"
, dim = nc_s
, missval = undef_out
, longname = "southern edge in atmospheric data"
)#end ncvar_def
nc_vlist$EDGEN = ncvar_def( name = "EDGEN"
, units = "degrees_north"
, dim = nc_s
, missval = undef_out
, longname = "northern edge in atmospheric data"
)#end ncvar_def
nc_vlist$time = ncvar_def( name = "time"
, units = paste0( "days since ",as.character(time_first)
, " 00:00:00 UTC"
)#end paste0
, dim = nc_t
, missval = undef_out
, longname = "observation time"
)#end ncvar_def
# Loop through FATES met drivers, add them
for (v in seq_along(varinfo[[1]])){
# Handy shorter names
v_vfates = varinfo$vfates[v]
v_vlname = varinfo$vlname[v]
v_vunits = varinfo$vunits[v]
#Add variable information
nc_vlist[[v_vfates]] = ncvar_def( name = v_vfates
, units = v_vunits
, dim = nc_xyt
, missval = undef_out
, longname = v_vlname
)#end ncvar_def
}#end for (v in seq_along(varinfo[[1]]))
# Create file
nc_conn = nc_create(filename=eddy_file,vars=nc_vlist,verbose=FALSE)
#---~---
# Put coordinates, tower height and attributes to the netcdf
#---~---
# Longitude, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="LONGXY",vals=array(data=outlon ,dim=xy))
dummy = ncatt_put(nc=nc_conn,varid="LONGXY",attname="mode" ,attval="time-invariant")
# Latitude, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="LATIXY",vals=array(data=outlat ,dim=xy))
dummy = ncatt_put(nc=nc_conn,varid="LATIXY",attname="mode" ,attval="time-invariant")
# Western edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGEW" ,vals=edge_w)
dummy = ncatt_put(nc=nc_conn,varid="EDGEW" ,attname="mode" ,attval="time-invariant")
# Eastern edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGEE" ,vals=edge_e)
dummy = ncatt_put(nc=nc_conn,varid="EDGEE" ,attname="mode" ,attval="time-invariant")
# Southern edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGES" ,vals=edge_s)
dummy = ncatt_put(nc=nc_conn,varid="EDGES" ,attname="mode" ,attval="time-invariant")
# Northern edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGEN" ,vals=edge_n)
dummy = ncatt_put(nc=nc_conn,varid="EDGEN" ,attname="mode" ,attval="time-invariant")
# Time, append calendar type.
dummy = ncvar_put(nc=nc_conn,varid="time" ,vals=tsince)
dummy = ncatt_put(nc=nc_conn,varid="time" ,attname="calendar",attval="gregorian")
#---~---
# Put variables to the netcdf
for (v in seq_along(varinfo[[1]])){
# Handy shorter names
v_vfates = varinfo$vfates[v]
v_vlname = varinfo$vlname[v]
v_vunits = varinfo$vunits[v]
#Add variable information
dummy = ncvar_put( nc = nc_conn
, varid = v_vfates
, vals = array(data=eddy_emean[[v_vfates]],dim=xyt)
)#end ncvar_put
#Add attribute to highlight this is time-dependent
dummy = ncatt_put( nc = nc_conn
, varid = v_vfates
, attname = "mode"
, attval = "time-dependent")
}#end for (v in seq_along(varinfo[[1]]))
# Add title specific for this month/year.
nc_title = paste0( "Averages by month and year for ",site_desc)
att_global = modifyList( x = att_template, val = list( title = nc_title ))
# Loop through global attributes
for (l in seq_along(att_global)){
# Current attribute information
att_name = names(att_global)[l]
att_value = att_global[[l]]
# Add attribute
dummy = ncatt_put(nc=nc_conn,varid=0,attname=att_name,attval=att_value)
}#end for (l in seq_along(att_global))
# Close the file
dummy = nc_close(nc_conn)