Introduction

This script reads in hourly meteorological drivers and eddy covariance data from towers and generates averages by month and year that can be used to assess ELM and CLM. Currently, the script takes only “1-D” variables (i.e., no soil data), but this will be revisited in the future. Except for time information, all other variables are optional, but typically include the following:

Time. Either multiple vectors defining the time, or one column with full time information, which should contain the following variables (any order).
- year
- month
- day
- hour
- minute
Atmospheric pressure
Air temperature
Humidity. One or multiple of them may be provided; output will always the humidity variables provided, and will attempt to include specific humidity (which may require pressure and/or temperature as well).
- Specific humidity
- Vapour pressure
- Mixing ratio
- Relative humidity
Wind speed
Incident solar irradiance
Precipitation
Outgoing shortwave (solar) irradiance
Outgoing longwave (thermal) irradiance
Net irradiance (solar + thermal)
Sensible heat flux (eddy covariance)
Heat flux into ground
Water vapour flux (eddy covariance)
CO[2] flux (eddy covariance)
Change in CO[2] storage in the canopy air space
Friction velocity
Gross Primary Productivity (GPP)
Ecosystem respiration
Net Ecosystem Productivity (NEP, positive means net carbon accumulation in ecosystem)

The example below shows how the data should look like (first 10 lines):

year,month,day,hour,min,atm.prss,atm.tmp,atm.shv,atm.vels,atm.vdir,rshort.in,rshort.out,rlong.in,rlong.out,rnet,rain,sens,fh2o,fco2,storco2,ustar,gep,reco,nee,atm.rhv
2004,1,1,0,0,999.5,24.41,18.42,2.9,58.8,0,0,443.76,444.82,-1.68,0,-30,0.01,9.92,NaN,0.32,0,9.26,9.26,97.87
2004,1,1,1,0,999.5,24.63,18.5,2.55,65.1,0,0,444.67,445.85,-1.77,0,NaN,0.02,12.95,NaN,0.32,0,9.26,9.26,96.97
2004,1,1,2,0,999.5,24.77,18.64,2.15,59.5,0,0,445.67,446.48,NaN,0,NaN,NaN,-3.32,8.57,0.18,0,5.25,5.25,96.9
2004,1,1,3,0,998.5,24.94,18.78,2.45,55.4,0,0,446.41,447.56,-2.3,0,NaN,0.04,9.63,6.59,0.28,0,16.22,16.22,96.56
2004,1,1,4,0,998.5,24.21,17.88,4.4,92.8,0,0,438.98,443.38,-4.4,4.2,NaN,NaN,NaN,-24.92,0.56,0,9.56,9.56,95.99
2004,1,1,5,0,998.5,23.73,17.76,3.5,101.8,0,0,437.42,440.86,-3.63,0.2,NaN,NaN,NaN,-1.06,0.47,0,9.56,9.56,98.16
2004,1,1,6,0,998.5,24.03,18.08,4.05,76.9,0,0,440.88,442.3,-1.44,2.6,NaN,NaN,NaN,-0.3,0.65,0,9.56,9.56,98.16
2004,1,1,7,0,999.5,23.44,17.5,4.65,64.9,1.09,NaN,436.54,438.12,-0.53,2.8,NaN,NaN,NaN,-2.17,0.76,0,9.56,9.56,98.5
2004,1,1,8,0,1000.5,23.54,17.63,1.95,96.9,33.97,3.72,433.63,438.75,25.13,0,-16.46,0.04,-1.42,3.71,0.22,7.26,9.56,2.3,98.73
2004,1,1,9,0,1001.5,23.93,18.06,0.75,211.2,93.71,9.58,432.94,440.62,76.44,0,NaN,NaN,-1.71,-1.31,0.15,9.27,9.56,0.29,98.9
2004,1,1,10,0,1002,24.67,18.73,0.3,206.8,239.25,21.8,419.64,442.04,195.05,0,16.87,0.11,-5.15,-5.04,0.13,18.62,9.56,-9.06,98.28

The data does not need to be gap-filled. We will use the average diurnal cycle with available data to build summaries at the monthly scale.

Reset session

Use this chunk to fully reset R.

# Unload all packages except for the R default ones
plist = names(sessionInfo()$otherPkgs)
if (length(plist) > 0){
   dummy = sapply(X=paste0("package:",plist),FUN=detach,character.only=TRUE,unload=TRUE)
}#end if (length(plist) > 0)


# Remove all variables
rm(list=ls())

# Reset warnings
options(warn=0)

# Close all plots
invisible(graphics.off())

# Clean up
invisible(gc())

Main settings

Required packages

Load some packages. By default, we skip the warning messages, but it may be a good idea to visualise them if it is the first time running.

isfine = c( data.table   = require(data.table  ,quietly=TRUE,warn.conflicts=FALSE)
          , lubridate    = require(lubridate   ,quietly=TRUE,warn.conflicts=FALSE)
          , ncdf4        = require(ncdf4       ,quietly=TRUE,warn.conflicts=FALSE)
          , patchwork    = require(patchwork   ,quietly=TRUE,warn.conflicts=FALSE)
          , purrr        = require(purrr       ,quietly=TRUE,warn.conflicts=FALSE)
          , RColorBrewer = require(RColorBrewer,quietly=TRUE,warn.conflicts=FALSE)
          , reshape2     = require(reshape2    ,quietly=TRUE,warn.conflicts=FALSE)
          , tidyverse    = require(tidyverse   ,quietly=TRUE,warn.conflicts=FALSE)
          , viridis      = require(viridis     ,quietly=TRUE,warn.conflicts=FALSE)
          )#end c
if (! all(isfine)){
   cat0(" List of required packages, and the success status loading them:\n")
   print(isfine)
   stop(" Some packages are missing and must be installed.")
}

Paths

Set paths and files for input and output.

main_path. The main path for processing data. The output path will be generated here.
input_file. The csv file containing the gap-filled tower/AWS data (full path).
output_path. The main output path for the site.

main_path   = file.path(path.expand("~"),"Documents","LocalData","TowerData","Tanguro","make_single_site")
input_file  = file.path(main_path,"input_csv","tb0_eddy-data_2008-2018.csv")
output_path = file.path(path.expand("~"),"Data","FATES_DataSets")

Author information

Provide information about yourself. This will be included in the netCDF headers. In case you do not want to specify any of them, set them to NA_character_

author_name. Person who is generating these files (typically it’s you).
author_email. Contact email (so people can reach out to you if problems arise).
datprov_name. Data provider name (who shared the met drivers with you).
datprov_email. Data provider email.
data_usage_notes. Brief data usage note message.

author_name  = "That Is You"
author_email = "yourself@somewhere.gov"
datprov_name = "Data Provider"
datprov_email = "amazing_collaborator@somewhere.edu"
data_usage_notes = paste0( " If you plan to use these data for any scientific analysis,"
                         , " you should contact the data provider first and ask for permission"
                         , " to use the data and check with them how to acknowledge their"
                         , " contribution (including, but not limited to offer co-authorship)."
                         )#end paste0

Tower information

Provide some basic information about the tower:

xid. Unique Tower ID.
site_desc. Site description (to be appended to the netCDF headers)
dat_version. Version of this data set.
site_lon. Site longitude (degrees east).
site_lat. Site latitude (degrees north).
site_alt. Site altitude (metres above sea level).
site_refhgt. Height reference for observations (e.g., the tower height above ground).
dxy. Intended grid size (we will generate a 1x1 pseudo-grid for FATES).
undef_in. A string to denote missing values in the input (csv) file. In case multiple flags exist, use the vector concatenation format (e.g., c("NA","NaN","-999.9"))
undef_out. A number to denote missing values in the output (NetCDF) file. This is needed by NetCDF.

xid         = "tanguroMT-BR"
site_desc   = "Tanguro, MT, Brazil"
dat_version = "1.0"
site_lon    = -52.41
site_lat    =   -13.08
site_alt    =  349.0
site_refhgt =   36.0
dxy         =   1.0
undef_in    = "NaN"
undef_out   = -9999.99

Useful variables for time conversion: * UTC offset, if data are provided in local time. For example, if the location is in the Western Hemisphere and the time zone is UTC-3, then utc_off should be -3 (Important. If the input data are already in UTC, then set utc_off to zero). * Year offset, if year is provided with two digits. For example, if the data are for 2003-2016 but the years are in 03-16 format, set year_off to 2000. If the year is already provided in four digits, set it to zero.

utc_off  = -4
year_off = 0

Next, we identify how time information is provided, by setting variable date_colname.
* If date_colname=NA_character_, then variable dateinfo must contain the names of the columns corresponding to year (year), month (month), day (day), hour (hour), and minute (min), respectively. If minute is absent, set it to NA_character_. * Otherwise, date_colname indicates the name of the column in the input data that has the time information (case sensitive). In this case, variable dateinfo should indicate the position of each piece of the time information, following the examples below. If minute is absent, set it to NA_integer_

Example of time string	year	month	day	hour	min
06/29/2021 14:30:22	3	1	2	4	5
06/29/2021 14:30	3	1	2	4	5
29/06/2021 14:30	3	2	1	4	5
2021-06-29 14:30	1	2	3	4	5
06/29/2021 14	3	1	2	4	NA

# Is date provided as multi-column (TRUE) or single string (FALSE)?
date_colname = NA_character_

# Time information.  
dateinfo = c( year = "year", month = "month", day = "day", hour = "hour", min = "min")

# Examples of time information:
# - If date_colname is NA_character_
# dateinfo = c( year = "Year", month = "Month", day = "Date", hour = "Hour", min = "Minute")
#
# - If date_colname is a column in the input data.
# dateinfo = c( year = 3, month = 1, day = 2, hour = 4, min = 5)

How to interpret the time stamp. Currently there are three options, as described below. To help illustrating the cases, we give examples of the beginning and end of average period of an example time stamp (2021-06-11 08:00) for an hourly average data where the high-frequency data were collected at 1-minute resolution.

imetavg	Description	Beginning	End
1	End of averaging period	2021-06-11 07:01	2021-06-11 07:00
2	Beginning of averaging period	2021-06-11 08:00	2021-06-11 08:59
3	Middle of averaging period	2021-06-11 07:31	2021-06-11 08:30

imetavg = 1

Input variables

Here we will provide information about the eddy covariance variables. Our goal is to have the variables names and units as defined in the table below, to be consistent with ELM/CLM units:

FATES variable	Description	Units
PBOT	Atmospheric pressure	Pa
TBOT	Air temperature	K
QBOT¹	Specific humidity	kg kg^-1
RBOT¹	Vapour mixing ratio	kg kg^-1
EBOT¹	Vapour pressure	Pa
RH¹	Relative humidity	%
WIND	Wind speed	m s^-1
FSDS	Incident solar irradiance	W m^-2
FLDS	Incident longwave irradiance	W m^-2
RAIN	Precipitation rate	kg m^-2 s^-1 (mm s^-1)
FSR	Outgoing short wave (solar) irradiance	W m^-2
FIRE	Outgoing long wave (thermal) irradiance	W m^-2
Rnet	Net irradiance (solar + thermal)	W m^-2
FSH	Sensible heat flux (eddy covariance)	W m^-2
FGR	Heat flux into ground	W m^-2
QEVTR²	Water vapour flux (eddy covariance)	kg m^-2 s^-1 (mm s^-1)
USTAR	Friction velocity	W m^-2
GPP	Gross Primary Productivity (GPP)	gC m^-2 s^-1
ER³	Ecosystem respiration	gC m^-2 s^-1
NEP	Net Ecosystem Productivity (NEP, + is sink)	gC m^-2 s^-1

Notes

Only one of the humidity variables needs be provided, but we recommend QBOT and RH as these variables are also output variables in ELM and CLM. The output file will always have the one humidity variable provided, and will attempt to compute specific humidity (QBOT), as long as other potentially dependent variables (pressure and/or temperature) are also provided.
Total water flux is not available as a variable in ELM or CLM. Instead, this can be calculated from the total evaporation from ground (QSOIL), evaporation from canopy surface (QVEGE), and transpiration (QVEGT). Make sure that your ELM and CLM output has these variables.
Ecosystem respiration is only available under some configurations in ELM or CLM. More often, it will be calculated from the total autotrophic respiration (AR) and total heterotrophic respiration (HR). Make sure that your ELM and CLM output has these variables.

First, we define some unit conversion constants for the chunk below

Pa_2_hPa  = 0.01      ; hPa_2_Pa  = 1. / Pa_2_hPa
K_2_degC  = -273.15   ; degC_2_K  =    - K_2_degC
kg_2_g    = 1000.     ; g_2_kg    = 1. / kg_2_g
sec_2_hr  = 1./3600.  ; hr_2_sec  = 1. / sec_2_hr
sec_2_day = 1./86400. ; day_2_sec = 1. / sec_2_day
frac_2_pc = 100.      ; pc_2_frac = 1. / frac_2_pc
umol_2_gC = 1.20107e-5; gC_2_umol = 1. / umol_2_gC

Then use the varinfo list to standardise the output. This is a tibble object (defined with tribble), containing the following elements:

vfates. Variable name from the table above (don’t change this value).
vlname. Long name for variable (don’t change this value).
vunits. Units for the variable (same as the table above, don’t change this value).
vinput. Variable name in the input file.
add0. Value to be added to the original input
mult. Value to be multiplied to the original input

For add0 and mult, except for trivial factors (add0=0. or mult=1), refrain from typing “magic numbers” here. Instead, use the unit conversion parameters described in the chunk above. In case the sought unit conversion is not available in the previous chunk, feel free to add more factors.

varinfo = tribble( ~vfates   , ~vlname                                  , ~vunits  , ~vinput     , ~add0   , ~mult
                 , "PBOT"    , "surface pressure at the tower"          , "Pa"     , "atm.prss"  , 0.      ,  hPa_2_Pa
                 , "TBOT"    , "temperature at the tower"               , "K"      , "atm.tmp"   , degC_2_K,  1.
                 , "QBOT"    , "specific humidity at the tower"         , "kg/kg"  , "atm.shv"   , 0.      ,  g_2_kg
                 , "WIND"    , "wind at the tower"                      , "m/s"    , "atm.vels"  , 0.      ,  1.
                 , "FSDS"    , "incident solar radiation at the tower"  , "W/m2"   , "rshort.in" , 0.      ,  1.
                 , "FLDS"    , "incident thermal radiation at the tower", "W/m2"   , "rlong.in"  , 0.      ,  1.
                 , "PRECTmms", "precipitation rate at the tower"        , "mm/s"   , "rain"      , 0.      ,  sec_2_hr
                 , "FSR"     , "outgoing shortwave (solar) irradiance"  , "W/m2"   , "rshort.out", 0.      ,  1.
                 , "FIRE"    , "outgoing long wave (thermal) irradiance", "W/m2"   , "rlong.out" , 0.      ,  1.
                 , "Rnet"    , "Net irradiance (solar + thermal)"       , "W/m2"   , "rnet"      , 0.      ,  1.
                 , "FSH"     , "Sensible heat flux"                     , "W/m2"   , "fsens"     , 0.      ,  1.
                 , "QEVTR"   , "Total water vapour flux"                , "mm/s"   , "fh2o"      , 0.      ,  sec_2_hr
                 , "USTAR"   , "Friction velocity"                      , "m/s"    , "ustar"     , 0.      ,  1.
                 , "GPP"     , "Gross primary productivity"             , "gC/m2/s", "gep"       , 0.      ,  umol_2_gC
                 , "ER"      , "Ecosystem respiration"                  , "gC/m2/s", "reco"      , 0.      ,  umol_2_gC
                 , "NEP"     , "Net Ecosystem productivity (+ is sink)" , "gC/m2/s", "nee"       , 0.      , -umol_2_gC
                 )#end tribble

This concludes the initial settings. From this point on, you may not need to change anything, unless you are debugging or adding new features.

Data processing

Met driver loading

Read in the csv file.

eddy_orig = read.csv( file             = input_file
                    , header           = TRUE
                    , comment.char     = ""
                    , na.strings       = undef_in 
                    , stringsAsFactors = FALSE
                    )#end read.csv
eddy_orig = tibble(eddy_orig)

Standardise time. It must be a vector of type lubritime.

if (is.na(date_colname)){
   # Create minute column in case it is missing.
   if (is.na(dateinfo[["min"]])){
      dateinfo[["min"]] = "Minute"
      eddy_orig$Minute  = rep(x=0,times=nrow(eddy_orig))
   }#end if (is.na(dateinfo[["min"]]))
   
   # Find date variables and rename them.
   idx                   = match(dateinfo,names(eddy_orig))
   names(eddy_orig)[idx] = names(dateinfo)

}else{
   # Split the time label
   decomp_time = lapply( X   = tstrsplit( eddy_orig[[date_colname]],"[^0-9]")
                       , FUN = as.numeric
                       )#end lapply

   # In case minute is missing, append a vector with zeroes.
   if (is.na(dateinfo[["min"]])){
      decomp_time       = c(decomp_time,list(0*decomp_time[[1]]))
      dateinfo[["min"]] = length(decomp_time)
   }#end if (is.na(dateinfo[["min"]]))

   # Select list elements that are useful for time.
   decomp_time        = decomp_time[dateinfo]
   names(decomp_time) = names(dateinfo)
   decomp_time        = as.data.table(decomp_time)

   # Merge time information to the main tibble.
   eddy_orig = as_tibble(cbind(eddy_orig,decomp_time))
   
   
}#end if (is.na(date_colname) && is.na(dateinfo[["min"]]))


# Create lubridate-friendly time stamp
eddy_orig = eddy_orig %>%
            mutate( year   = year + year_off
                  , tstamp = make_datetime( year   = year
                                          , month  = month
                                          , day    = day
                                          , hour   = hour
                                          , min    = min
                                          , tz     = "UTC"
                                          )#end make_datetime
                  )#end mutate

Time standardisation

Find the time difference, and assess the first and last year with full data.

#---- Time interval between observations.
dtstamp = mean(diff(eddy_orig$tstamp))

#---- Number of observations per day.
nperday = time_length(make_difftime(day=1),unit="hour") / time_length(x=dtstamp,unit="hour")

#---- Find which years have complete data
datsumm = group_by (.data=eddy_orig,year,month)                                             %>% 
          summarise( count  = n())                                                          %>%
          ungroup  ()                                                                       %>%
          mutate   (expect = days_in_month(make_datetime(year=year,month=month)) * nperday) %>%
          group_by (year)                                                                   %>%
          summarise(fine = all(count == expect))                                            %>%
          ungroup  ()

## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

#---- Find first and last complete years
allfine = all(datsumm$fine)
yeara   = min(datsumm$year[datsumm$fine])
yearz   = max(datsumm$year[datsumm$fine])

#---- Find and last expected time
tstampa = make_datetime( year=yeara  ,month=1,day=1)
tstampz = make_datetime( year=yearz+1,month=1,day=1)

Correct data to be in UTC whilst accounting for the time stamp. For FATES, the time stamp should always correspond to the middle of the averaging window.

if (imetavg == 1){
  dt.utc = make_difftime(hour=-utc_off) - 0.5 * dtstamp
}else if (imetavg == 2){
  dt.utc = make_difftime(hour=-utc_off) + 0.5 * dtstamp
}else if (imetavg == 3){
  dt.utc = make_difftime(hour=-utc_off)
}#end if (imetavg)

eddy_data = eddy_orig %>% 
            mutate(tstamp = tstamp + dt.utc) %>%
            select(!c(year,month,day,hour,min))

With the time zone shift, we may have a few data points off at the beginning or at the end.
As a simple solution, we just shift these hours to the beginning or the end of the simulation, just so every year/month has all the data.

eddy_data = eddy_data %>% 
            mutate( tstamp  = case_when( tstamp < tstampa ~ tstampz + difftime(tstamp,tstampa)
                                       , tstamp > tstampz ~ tstampa + difftime(tstamp,tstampz)
                                       , TRUE             ~ tstamp
                                       )#end when
                  ) %>% arrange(tstamp)

Unit standardisation for output

Simplify output so variables are in the correct units, and only variables needed for the output remain in the output.

eddy_fates = eddy_data                        %>% 
             select(c(tstamp,varinfo$vinput)) %>%
             rename_at(vars(c("tstamp",varinfo$vinput)), ~ c("time",varinfo$vfates))

Fix units for all variables:

# Create a simple addition tibble
add0 = varinfo %>% mutate(id=1) %>% select(c(id,vfates,add0)) %>% 
       spread(vfates,add0) %>% select(-id) %>% relocate(varinfo$vfates)             

# Create a simple multiplication tibble
mult = varinfo %>% mutate(id=1) %>% select(c(id,vfates,mult)) %>% 
       spread(vfates,mult) %>% select (! id) %>% relocate(varinfo$vfates)          

# Fix units. We temporarily remove time from the tibble, so we can add and multiply.
tstamp     = eddy_fates$time
eddy_fates = eddy_fates %>% select(! time)
eddy_fates = as_tibble(map2(eddy_fates,add0,function(x,y) if (is.numeric(x)){x+y}else{x}))
eddy_fates = as_tibble(map2(eddy_fates,mult,function(x,y) if (is.numeric(x)){x*y}else{x}))
eddy_fates = eddy_fates %>% mutate( time = tstamp) %>% relocate(c("time",varinfo$vfates))

Derive humidity

This step is only needed if specific humidity was not provided. In this case, we derive specific humidity (\(q_{v}\)) [ \(\mathrm{kg_{v}\,kg_{a}^{-1}}\)] using one the following relationships (in this order of preference), using the fact that specific humidity is the ratio of mass (or partial density) of water vapour \(\rho_{v}\) [\(\mathrm{kg_{v}\,m^{-3}}\)] and total mass (or density) of air (\(\rho_{a}\)) [\(\mathrm{kg_{a}\,m^{-3}}\)].

Mixing ratio \(r_v\) [ \(\mathrm{kg_{v}\,kg_{d}^{-1}}\)] is defined as the ratio between partial density of water vapour \(\rho_{v}\) and partial density of dry air (\(\rho_{d}\)) [\(\mathrm{kg_{d}\,m^{-3}}\)]. Assuming that \(\rho_{a} = \rho_{v} + \rho_{d}\), we can relate mixing ratio and specific humidity as: \[\begin{align} & q_{v} = \frac{r_{v}}{r_{v}+1} \end{align}.\]
To convert from water vapour pressure \(e_{v}\) [\(\mathrm{Pa}\)] to \(q_v\), we use the ideal gas equation of a mixture of dry air and water vapour , and account for the difference in molar masses of water vapour (\(\mathcal{M}_v = 1.802 \times 10^{-2}\,\mathrm{kg\,mol^{-1}}\)) and dry air (\(\mathcal{M}_d = 2.897 \times 10^{-2}\,\mathrm{kg\,mol^{-1}}\)): \[\begin{align} & q_{v} = \frac{\varepsilon{}\,e_{v}}{p - \left(1 - \varepsilon{}\right)\,e_{v}} \end{align},\] where \(p\) [\(\mathrm{Pa}\)] is the total air pressure and \(\varepsilon = \mathcal{M}_{v} / \mathcal{M}_{d}\).
Relative humidity (\(\mathrm{RH}\)) [\(\%\)] is defined as \(\mathrm{RH} = 100 \, e_{v}/e_{v}^{\star}\), where \(e_{v}^{\star}\) is the vapour pressure at saturation, which depends on temperature \(T\) [\(\mathrm{K}\)]. To derive specific humidity, we use (Bolton 1980) empirical equation for \(e_{v}^{\star}\) to derive \(e_{v}\): \[\begin{align} & e_{v} = 0.01 \, \mathrm{RH} \,e_{0}\,\exp{\left[ \frac{17.67\,\left( T - T_{0}\right)}{T - 29.65} \right]} \end{align},\] where \(e_{0} = 611.66\,\mathrm{Pa}\) is the vapour pressure of saturation at \(T_{0}=273.15\,\mathrm{K}\). We then convert \(e_{v}\) to \(q_{v}\) using the same equation described above.

#---~---
# Define some thermodynamic constants.
#---~---
# Molar mass of water vapour [kg/mol]
mm_h2o  = 0.01801505
# Molar mass of dry air [kg/mol]
mm_dry  = 0.02897
# Molar mass ratio
eps_mol = mm_h2o / mm_dry
# Saturation vapour pressure at 273.15K [Pa]
esat_0C = 611.65685464


if ("QBOT" %in% names(eddy_fates)){
   # Specific humidity already in the meteorological driver, find relative humidity.
   cat(" - Specific humidity included in the input data. No action needed.\n")
   add_qbot = FALSE
}else if(all(c("RH","PBOT","TBOT") %in% names(eddy_fates))){
   # Find specific humidity from vapour pressure
   eddy_fates = eddy_fates %>%
                mutate( ESAT = esat_0C * exp( 17.67 * (TBOT - degC_2_K) / (TBOT - 29.65))
                      , EBOT = pc_2_frac * RH * ESAT
                      , QBOT = eps_mol * EBOT / (PBOT - (1. - eps_mol) * EBOT)
                      ) %>% select(! c(ESAT,EBOT))
   add_qbot   = TRUE
}else if (all(c("EBOT","PBOT") %in% names(eddy_fates))){
   # Find specific humidity from vapour pressure
   eddy_fates = eddy_fates %>%
                mutate( QBOT = eps_mol * EBOT / (PBOT - (1. - eps_mol) * EBOT) )
   add_qbot   = TRUE
}else if ("RBOT" %in% names(eddy_fates)){
   # Find specific humidity from mixing ratio
   eddy_fates = eddy_fates %>%
               mutate( QBOT = RBOT / (1. + RBOT) )
   add_qbot   = TRUE
}else{
   # It is not possible to get QBOT from the provided variables.
   add_qbot   = FALSE
}#end if ("QBOT" %in% names(eddy_fates))

##  - Specific humidity included in the input data. No action needed.

# Update or insert specific humidity to the list of output variables.
if (add_qbot){
   varinfo = varinfo %>%
             rows_upsert( tibble( vfates = "QBOT"
                                , vlname = "specific humidity at the tower"
                                , vunits = "kg/kg"
                                )#end tibble
                        )#end rows_upsert
}#end if (add_qbot)

Data output

Create paths for output, using FATES convention.

# Labels for describing the data set.
ymd_now = today(tzone = "UTC")
ymd_lab = sprintf("%4.4i%2.2i%2.2i",year(ymd_now),month(ymd_now),day(ymd_now))

# Tag specific for this site, grid, and version (used for file and path names).
site_tag  = paste0("1x1pt-",xid,"_v",dat_version,"_c",ymd_lab)


# Path to where we will write the drivers.
eddy_base = paste0(site_tag,"_eddy-summ.nc")
eddy_path = file.path(output_path,site_tag)
eddy_file = file.path(eddy_path,eddy_base)

# Create directory
dummy = dir.create(path=eddy_path  ,showWarnings=FALSE,recursive=TRUE)

Group data by month and year. Because we may have missing data, we take first the mean diurnal cycle for each month and year, then find the monthly averages. This reduces the risk of biases in the monthly average estimate due to frequent missing data in one time of the day, for example.

# Summarise data by hour month and year, then summary by month and year.
eddy_emean = eddy_fates                                                      %>% 
             mutate( year = year(time), month=month(time), hour=hour(time))  %>%
             select(! c(time))                                               %>%
             group_by( year,month,hour)                                      %>%
             summarise_all( ~mean(.,na.rm=TRUE))                             %>%
             ungroup()                                                       %>%
             select(! c(hour))                                               %>%
             group_by( year,month)                                           %>%
             summarise_all( ~mean(.,na.rm=FALSE))                            %>%
             ungroup()                                                       %>%
             mutate( time = make_datetime(year=year,month=month,tz="UTC"))   %>%
             select(! c(year,month))                                         %>% 
             select(c("time",varinfo$vfates))

# Set the number of times to be written to the output
nemean = nrow(eddy_emean)

Generate edge information for the single data point.

# Find precision for the coordinates (typically 1% of the grid size).
outprec = -floor(log10(dxy*0.01))

# Standardise coordinates (and make sure latitude cannot exceed the poles).
outlon  = round(site_lon,outprec) %% 360.
outlat  = max(-90,min(90-0.5*dxy,round(site_lat,outprec)))

# Find edges.
edge_w = (outlon - 0.5 * dxy) %% 360.
edge_e = (outlon + 0.5 * dxy) %% 360.
edge_s = outlat - 0.5 * dxy
edge_n = outlat + 0.5 * dxy

Define the template for the global attributes.

# Define the code developer information (indirect way so the email is not visible).
developer_name  = c( 111L, 103L, 110L, 111L,  76L,  32L, 115L, 111L,  99L, 114L,  97L,  77L)
developer_email = c( 118L, 111L, 103L,  46L, 108L,  98L, 108L,  64L, 111L, 103L, 110L, 111L
                   , 108L, 109L)


# Define the template.  We will update the title in each time step.
att_template = list( title          = "To be replaced when looping through months"
                   , version        = dat_version
                   , date_created   = paste0(as.character(now(tzone="UTC")), "UTC")
                   , source_code    = "make_fates_tower_summary.Rmd"
                   , code_notes     = "Eddy covariance summary for benchmarking ELM-FATES and CLM-FATES"
                   , code_developer = paste0( intToUtf8(rev(developer_name))
                                            ," <"
                                            , intToUtf8(rev(developer_email))
                                            ,">"
                                            )#end paste0
                   , file_author    = paste0(author_name," <",author_email,">")
                   , data_provider  = paste0(datprov_name," <",datprov_email,">")
                   , usage_notes    = data_usage_notes
                   )#end list

Create a single NetCDF file with all the averages by month and year. We do not save the monthly average across all years because simulations may or may not overlap with all the observation months and years.

# Extract time, and turn it into a difference in months since the first time
year_first  = year (eddy_emean$time[1])
month_first = month(eddy_emean$time[1])
time_first  = eddy_emean$time[1]

# Extract time, and turn it into a difference in months
tsince = as.numeric(difftime(eddy_emean$time,time_first,units="days"))

# In case file exists, it will be re-created.
cat(" + Write averages by month and year to ",eddy_base,".\n",sep="")
if (file.exists(eddy_file)) file.remove(eddy_file)

# Add dimensions: longitude, latitude, and time. We do not automatically create the 
# dimension variable for time because R would create it in double precision.  Instead,
# we append variable time manually.
xx  = ncdim_def( name="lon"   ,units="",vals=1L               ,create_dimvar=FALSE)
yy  = ncdim_def( name="lat"   ,units="",vals=1L               ,create_dimvar=FALSE)
tt  = ncdim_def( name="time"  ,units="",vals=seq_along(tsince),create_dimvar=FALSE)
ss  = ncdim_def( name="scalar",units="",vals=1L               ,create_dimvar=FALSE)

# List of dimensions, useful for setting variables.   
nc_xy  = list   (xx,yy)
nc_xyt = list(xx,yy,tt)
nc_t   = list      (tt)
nc_s   = list(ss)
xy     = c(1,1)
xyt    = c(1,1,nemean)

# Start list with variables. First we put the coordinates
nc_vlist        = list()
nc_vlist$LONGXY = ncvar_def( name     = "LONGXY"
                           , units    = "degrees_east"
                           , dim      = nc_xy
                           , missval  = undef_out
                           , longname = "longitude"
                           )#end ncvar_def
nc_vlist$LATIXY = ncvar_def( name     = "LATIXY"
                           , units    = "degrees_north"
                           , dim      = nc_xy
                           , missval  = undef_out
                           , longname = "latitude"
                           )#end ncvar_def
nc_vlist$EDGEW  = ncvar_def( name     = "EDGEW"
                           , units    = "degrees_east"
                           , dim      = nc_s
                           , missval  = undef_out
                           , longname = "western edge in atmospheric data"
                           )#end ncvar_def
nc_vlist$EDGEE  = ncvar_def( name     = "EDGEE"
                           , units    = "degrees_east"
                           , dim      = nc_s
                           , missval  = undef_out
                           , longname = "eastern edge in atmospheric data"
                           )#end ncvar_def
nc_vlist$EDGES  = ncvar_def( name     = "EDGES"
                           , units    = "degrees_north"
                           , dim      = nc_s
                           , missval  = undef_out
                           , longname = "southern edge in atmospheric data"
                           )#end ncvar_def
nc_vlist$EDGEN  = ncvar_def( name     = "EDGEN"
                           , units    = "degrees_north"
                           , dim      = nc_s
                           , missval  = undef_out
                           , longname = "northern edge in atmospheric data"
                           )#end ncvar_def
nc_vlist$time   = ncvar_def( name     = "time"
                           , units    = paste0( "days since ",as.character(time_first)
                                              , " 00:00:00 UTC"
                                              )#end paste0
                           , dim      = nc_t
                           , missval  = undef_out
                           , longname = "observation time"
                           )#end ncvar_def

# Loop through FATES met drivers, add them   
for (v in seq_along(varinfo[[1]])){
   # Handy shorter names
   v_vfates = varinfo$vfates[v]
   v_vlname = varinfo$vlname[v]
   v_vunits = varinfo$vunits[v]

   #Add variable information
   nc_vlist[[v_vfates]] = ncvar_def( name     = v_vfates
                                   , units    = v_vunits
                                   , dim      = nc_xyt
                                   , missval  = undef_out
                                   , longname = v_vlname
                                   )#end ncvar_def
}#end for (v in seq_along(varinfo[[1]]))

# Create file
nc_conn = nc_create(filename=eddy_file,vars=nc_vlist,verbose=FALSE)

#---~---
# Put coordinates, tower height and attributes to the netcdf
#---~---
# Longitude, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="LONGXY",vals=array(data=outlon     ,dim=xy))
dummy = ncatt_put(nc=nc_conn,varid="LONGXY",attname="mode"    ,attval="time-invariant")
# Latitude, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="LATIXY",vals=array(data=outlat     ,dim=xy))
dummy = ncatt_put(nc=nc_conn,varid="LATIXY",attname="mode"    ,attval="time-invariant")
# Western edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGEW" ,vals=edge_w)
dummy = ncatt_put(nc=nc_conn,varid="EDGEW" ,attname="mode"    ,attval="time-invariant")
# Eastern edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGEE" ,vals=edge_e)
dummy = ncatt_put(nc=nc_conn,varid="EDGEE" ,attname="mode"    ,attval="time-invariant")
# Southern edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGES" ,vals=edge_s)
dummy = ncatt_put(nc=nc_conn,varid="EDGES" ,attname="mode"    ,attval="time-invariant")
# Northern edge, append time-invariant tag
dummy = ncvar_put(nc=nc_conn,varid="EDGEN" ,vals=edge_n)
dummy = ncatt_put(nc=nc_conn,varid="EDGEN" ,attname="mode"    ,attval="time-invariant")
# Time, append calendar type.
dummy = ncvar_put(nc=nc_conn,varid="time"  ,vals=tsince)
dummy = ncatt_put(nc=nc_conn,varid="time"  ,attname="calendar",attval="gregorian")
#---~---

# Put variables to the netcdf
for (v in seq_along(varinfo[[1]])){
   # Handy shorter names
   v_vfates = varinfo$vfates[v]
   v_vlname = varinfo$vlname[v]
   v_vunits = varinfo$vunits[v]

   #Add variable information
   dummy = ncvar_put( nc    = nc_conn
                    , varid = v_vfates
                    , vals  = array(data=eddy_emean[[v_vfates]],dim=xyt)
                    )#end ncvar_put

   #Add attribute to highlight this is time-dependent
   dummy = ncatt_put( nc      = nc_conn
                    , varid   = v_vfates
                    , attname = "mode"
                    , attval  = "time-dependent")
}#end for (v in seq_along(varinfo[[1]]))

# Add title specific for this month/year.
nc_title   = paste0( "Averages by month and year for ",site_desc)
att_global = modifyList( x = att_template, val = list( title = nc_title ))

   
# Loop through global attributes
for (l in seq_along(att_global)){
   # Current attribute information
   att_name  = names(att_global)[l]
   att_value = att_global[[l]]

   # Add attribute 
   dummy = ncatt_put(nc=nc_conn,varid=0,attname=att_name,attval=att_value)
}#end for (l in seq_along(att_global))


# Close the file
dummy = nc_close(nc_conn)

References

Bolton, David. 1980. “The Computation of Equivalent Potential Temperature.” Mon. Wea. Rev. https://doi.org/10.1175/1520-0493(1980)108<1046:TCOEPT>2.0.CO;2.

Monthly eddy covariance generator

Marcos Longo

14-Jul-2021