Main script
Note: Code changes beyond this point are only needed if you are developing the notebook.
Initial settings.
First, we load the list of host land model variables that we may consider for the comparison.
source(file.path(util_path,"load.everything.r"),chdir=TRUE)
## + Load scripts from /Users/marcoslongo/Dropbox/Home/Data/TraitAllom_Workflow/RUtils.
Set some environment variables to define paths for shell script chunk.
# Define environment variables
dummy = Sys.setenv("MAIN_PATH"=main_path)
dummy = Sys.setenv("ORIG_PATH"=orig_path)
dummy = Sys.setenv("QUOTE_PATH"=quote_path)
dummy = Sys.setenv("AUTHOR_PATH"=author_path)
dummy = Sys.setenv("OWRITE_QUOTE"=tolower(as.character(owrite_quote)))
dummy = Sys.setenv("OWRITE_AUTHOR"=tolower(as.character(owrite_author)))
The pre-processor kit uses a Fortran program
SplitAuthor.f90
. The Fortran file is compiled by the script
itself, just make sure that SplitAuthor.f90 is located in base_path.
# Load user settings
if [[ -s "${HOME}/.bashrc" ]]; then . ${HOME}/.bashrc ; fi
if [[ -s "${HOME}/.profile" ]]; then . ${HOME}/.profile ; fi
if [[ -s "${HOME}/.bash_profile" ]]; then . ${HOME}/.bash_profile; fi
# Set Fortran compiler (feel free to add compilation options
export FC="$(which gfortran) -O3"
# Set language that will allow non-standard characters.
export LANG=C
export LC_ALL=C
# Create output path for quoted csv files
mkdir -p ${QUOTE_PATH}
# Compile executable
${FC} -o ${MAIN_PATH}/SplitAuthor.x ${MAIN_PATH}/SplitAuthor.f90
# List all files in the path with original files.
base_list=$(/bin/ls -1 ${ORIG_PATH} | grep -i ".txt$")
# Loop through every file to make the quoted CSV.
for base_txt in ${base_list}
do
# Set input and output files based on the base name.
base_out=$(echo ${base_txt} | sed s@".txt$"@"_quoted.csv"@g)
file_txt="${ORIG_PATH}/${base_txt}"
file_out="${QUOTE_PATH}/${base_out}"
# Turn file into a quoted comma-separated value file, in case they don't exist or
# the user does not want them to be overwritten.
if [[ ! -s ${file_out} ]] || ${OWRITE_QUOTE}
then
echo " + Convert ${base_txt} to ${base_out}:"
/bin/rm -f ${file_out} "${file_out}.bck"
/bin/cp -f ${file_txt} ${file_out}
echo " - Replace commas with semi-colons... "
sed -i".bck" s@","@";"@g ${file_out}
echo " - Replace double quotes with single quotes... "
sed -i".bck" s@"\""@"\'"@g ${file_out}
echo " - Remove DOS carriage return..."
sed -i".bck" s@"\r$"@""@g ${file_out}
echo " - Replace tabs at the end of the line with quote... "
sed -i".bck" s@"\t$"@"\""@g ${file_out}
echo " - Replace tabs with commas, and add quotes... "
sed -i".bck" s@"\t"@"\",\""@g ${file_out}
echo " - Append quotes to the beginning of the line... "
sed -i".bck" s@"^"@"\""@g ${file_out}
echo " - Done!"
/bin/rm -f "${file_out}.bck"
else
echo " + Skip conversion for ${base_txt}; file ${base_out} already exists."
fi
done
# The script cannot check files by author one by one. If owrite_author is true, it will delete
# the existing path and re-create it.
if [[ ! -d ${AUTHOR_PATH} ]] || ${OWRITE_AUTHOR}
then
# Remove current directory and create a new one.
/bin/rm -fr ${AUTHOR_PATH}
mkdir -p ${AUTHOR_PATH}
# Find all files to process
quote_list=$(/bin/ls -1 ${QUOTE_PATH} | grep -i "_quoted.csv$")
for quote_base in ${quote_list}
do
quote_file="${QUOTE_PATH}/${quote_base}"
echo " + Split file ${quote_base} by author."
(cd ${AUTHOR_PATH}; ${MAIN_PATH}/SplitAuthor.x ${quote_file})
done
else
# Skip creating files by author as they already exist.
echo " + Skip splitting files by author. They already exist."
fi
Make some system-specific configurations.
if (Sys.info()["sysname"] %in% "Darwin"){
# Mac OS, set UTF8 to Mac-specific
UTF8_System = "UTF-8-MAC"
}else{
# Linux or Windows, set UTF8 to generic
UTF8_System = "UTF-8"
}#end if (Sys.info()["sysname"] %in% "Darwin")
Define files and paths for input and output. We also create the output paths.
# Build suffix for model fittings.
use_suffix = paste(use_realm,use_lifeclass,sep="_")
# Build RData object file names for the trait and ancillary data,
# SMA (trait and photosynthesis), and allometry models.
Rdata_LookUp = file.path(rdata_path,"LookUp_AllRegions_AllPlants.RData" )
Rdata_RawTRY = file.path(rdata_path,"RawTRY_AllRegions_AllPlants.RData" )
Rdata_ExtraTRY = file.path(rdata_path,paste0("ExtraTRY_" , use_realm,"_AllPlants.RData"))
Rdata_GeolocationTRY = file.path(rdata_path,paste0("GeolocationTRY_", use_realm,"_AllPlants.RData"))
Rdata_GeodataTRY = file.path(rdata_path,paste0("GeodataTRY_" , use_realm,"_AllPlants.RData"))
Rdata_TaxonomyTRY = file.path(rdata_path,paste0("TaxonomyTRY_" , use_suffix ,".RData"))
Rdata_TidyTRY = file.path(rdata_path,paste0("TidyTRY_" , use_suffix ,".RData"))
# Make sure directories are set.
dummy = dir.create(path=lookup_path ,showWarnings=FALSE,recursive=TRUE)
dummy = dir.create(path=rdata_path ,showWarnings=FALSE,recursive=TRUE)
dummy = dir.create(path=uniq_trait_path,showWarnings=FALSE,recursive=TRUE)
dummy = dir.create(path=uniq_ancil_path,showWarnings=FALSE,recursive=TRUE)
# Set flags for the steps when loading the TRY data base. It helps to split the tasks into
# smaller steps.
load_ancil = TRUE
The following chunk reads the lists with ancillary and trait
settings, and makes sure that the initial settings are acceptable.
Specifically, it checks whether the variables selected to be in the
x axis are listed in try_trait
, and that all
traits to be part of allometry or SMA analyses are numeric.
# Check whether or not this is a `dry run` (just to produce look-up tables).
is_dry_run = is.na(ancil_file) || is.na(trait_file)
if (is_dry_run){
cat0(" + Dry run, this script will only produce look-up tables for traits and ancillary data.")
}else{
# Read in the list of traits (and save number of traits)
cat0(" + Read list of traits (",basename(trait_file),").")
try_trait = read_csv(file=trait_file,col_types="ciccccclclllllllcc")
n_try_trait = nrow(try_trait)
o = order(try_trait$Name)
try_trait = try_trait[o,,drop=FALSE]
o = c(which(is.na(try_trait$Supporting)),which(! is.na(try_trait$Supporting)))
try_trait = try_trait[o,,drop=FALSE]
# Read in the list of ancillary variables (and save number of ancillary variables)
# Make sure supporting variables are loaded after the main variables.
cat0(" + Read list of ancillary variables (",basename(ancil_file),").")
try_ancil = read_csv(file=ancil_file,col_types="cicclccccllc")
o = order(try_ancil$Name)
try_ancil = try_ancil[o,,drop=FALSE]
o = c(which(is.na(try_ancil$Supporting)),which(! is.na(try_ancil$Supporting)))
try_ancil = try_ancil[o,,drop=FALSE]
n_try_ancil = nrow(try_ancil)
sma_problem = try_trait$SMA & (! ( try_trait$Type %in% "numeric") )
allom_problem = try_trait$Allom & (! ( try_trait$Type %in% "numeric") )
photo_problem = try_trait$Photo & (! ( try_trait$Type %in% "numeric") )
if (any(sma_problem) || any(allom_problem) || any(photo_problem)){
cat0(" The following traits are inconsistent. Options \"sma\", \"allom\" and \"photo\" are restricted to numeric traits.")
problem_trait = try_trait %>%
mutate( Problem = ifelse( test = sma_problem
, yes = "SMA"
, no = ifelse( test = allom_problem
, yes = "Allometry"
, no = ifelse( test = photo_problem
, yes = "Photosynthesis"
, no = NA_character_ ) ) ) ) %>%
filter(! is.na(Problem)) %>%
select(Name,TraitID,Desc,Type,SMA,Allom,Problem)
cat(problem_trait)
cat0(" Fix settings in file ",trait_file,".")
stop(" Inconsistent trait settings.")
}#end if (any(sma_problem) || any(allom_problem))
}#end if (is_dry_run)
## + Read list of traits (TRY_TraitList.csv).
## + Read list of ancillary variables (TRY_AncillaryList.csv).
Either retrieve or load look-up table with units and additional information on the TRY data base. Normally this needs to be done only once, unless a new version of data request is being used.
if (reload_lookup && file.exists(Rdata_LookUp) && (! is_dry_run )){
# Load data.
cat0(" + Reload look-up table from ",basename(Rdata_LookUp),".")
dummy = load(Rdata_LookUp)
}else{
# Find the list of all files to read.
try_file_list = list.files(author_path,pattern="\\.csv$")
# Initialise the list of traits and ancillary variables.
cat0(" + Initialise look-up table of traits and ancillary variables.")
AncilAll = tibble( DataID = integer (0L)
, Name = character(0L)
, OrigValid = numeric (0L)
, OrigUnits = character(0L)
, StdValid = numeric (0L)
, StdUnits = character(0L)
)#end tibble
TraitAll = tibble( TraitID = integer (0L)
, Name = character(0L)
, OrigValid = numeric (0L)
, OrigUnits = character(0L)
, StdValid = numeric (0L)
, StdUnits = character(0L)
)#end tibble
# Go through every file, and retrieve information.
for ( f in seq_along(try_file_list)){
try_base = try_file_list[f]
try_file = file.path(author_path,try_base)
# Load author file.
cat0(" - Read file ",try_base,".")
TRYdbNow = read_csv( file = try_file
, col_types = paste(rep("c",times=28L),collapse="")
)#end read_csv
# Identify which lines contain traits.
is_trait = ! is.na(TRYdbNow$TraitID)
# Summarise traits.
TraitNow = TRYdbNow %>%
filter(! is.na(TraitID)) %>%
mutate( TraitID = as.integer(TraitID)
, Name = TraitName
, OrigValid = is.valid.data(OrigValueStr,qq.rm=TRUE)
, OrigUnits = OrigUnitStr
, StdValid = is.valid.data(StdValue ,qq.rm=TRUE)
, StdUnits = UnitName ) %>%
select(c(TraitID,Name,OrigValid,OrigUnits,StdValid,StdUnits))
TraitAll = rbind(TraitAll,TraitNow)
# Summarise ancillary data.
AncilNow = TRYdbNow %>%
filter(is.na(TraitID)) %>%
mutate( DataID = as.integer(DataID)
, Name = DataName
, OrigValid = is.valid.data(OrigValueStr,qq.rm=TRUE)
, OrigUnits = OrigUnitStr
, StdValid = is.valid.data(StdValue ,qq.rm=TRUE)
, StdUnits = UnitName ) %>%
select(c(DataID,Name,OrigValid,OrigUnits,StdValid,StdUnits))
AncilAll = rbind(AncilAll,AncilNow)
}#end for ( f in seq_along(try_file_list))
# Summarise look-up table for traits, using the ID.
cat0(" - Summarise trait look-up table.")
TraitLUT = TraitAll %>%
group_by(TraitID) %>%
summarise( CntUniqName = length.unique(Name)
, CntName = length(Name)
, CntOrigUnits = length.unique(OrigUnits)
, CntStdUnits = length.unique(StdUnits)
, Name = commonest(Name,na.rm=TRUE)
, OrigValid = sum(OrigValid,na.rm=TRUE)
, OrigUnits = commonest(OrigUnits,na.rm=TRUE)
, StdValid = sum(StdValid,na.rm=TRUE)
, StdUnits = commonest(StdUnits,na.rm=TRUE) ) %>%
ungroup() %>%
select(c(TraitID,Name,CntUniqName,CntName,OrigValid,OrigUnits,CntOrigUnits,StdValid,StdUnits,CntStdUnits))
# Summarise ancillary data
cat0(" - Summarise ancillary look-up table.")
AncilLUT = AncilAll %>%
group_by(DataID) %>%
summarise( CntUniqName = length.unique(Name)
, CntName = length(Name)
, CntOrigUnits = length.unique(OrigUnits)
, CntStdUnits = length.unique(StdUnits)
, Name = commonest(Name,na.rm=TRUE)
, OrigValid = sum(OrigValid,na.rm=TRUE)
, OrigUnits = commonest(OrigUnits,na.rm=TRUE)
, StdValid = sum(StdValid,na.rm=TRUE)
, StdUnits = commonest(StdUnits,na.rm=TRUE) ) %>%
ungroup() %>%
arrange(desc(CntName)) %>%
select(c(DataID,Name,CntUniqName,CntName,OrigValid,OrigUnits,CntOrigUnits,StdValid,StdUnits,CntStdUnits))
# Create list with all original units and standard units.
TraitOrigUnits = TraitAll %>%
group_by(TraitID) %>%
summarise( Name = commonest(Name,na.rm=TRUE)
, CntOrigUnits = tabulate(factor(OrigUnits,levels=sort(unique(OrigUnits))))
, OrigUnits = sort(unique(OrigUnits)) ) %>%
ungroup() %>%
select(TraitID,Name,OrigUnits,CntOrigUnits)
TraitStdUnits = TraitAll %>%
group_by(TraitID) %>%
summarise( Name = commonest(Name,na.rm=TRUE)
, CntStdUnits = tabulate(factor(StdUnits,levels=sort(unique(StdUnits))))
, StdUnits = sort(unique(StdUnits)) ) %>%
ungroup() %>%
select(TraitID,Name,StdUnits,CntStdUnits)
AncilOrigUnits = AncilAll %>%
group_by(DataID) %>%
summarise( Name = commonest(Name,na.rm=TRUE)
, CntOrigUnits = tabulate(factor(OrigUnits,levels=sort(unique(OrigUnits))))
, OrigUnits = sort(unique(OrigUnits)) ) %>%
ungroup() %>%
select(DataID,Name,OrigUnits,CntOrigUnits)
AncilStdUnits = AncilAll %>%
group_by(DataID) %>%
summarise( Name = commonest(Name,na.rm=TRUE)
, CntStdUnits = tabulate(factor(StdUnits,levels=sort(unique(StdUnits))))
, StdUnits = sort(unique(StdUnits)) ) %>%
ungroup() %>%
select(DataID,Name,StdUnits,CntStdUnits)
# Write CSV files
cat0(" - Write CSV files to ",lookup_path,".")
dummy = write_csv(x=TraitLUT ,file=file.path(lookup_path,paste0("Trait_LUT_" ,use_suffix,".csv")))
dummy = write_csv(x=TraitOrigUnits,file=file.path(lookup_path,paste0("Trait_OrigUnits_",use_suffix,".csv")))
dummy = write_csv(x=TraitStdUnits ,file=file.path(lookup_path,paste0("Trait_StdUnits_" ,use_suffix,".csv")))
dummy = write_csv(x=AncilLUT ,file=file.path(lookup_path,paste0("Ancil_LUT_" ,use_suffix,".csv")))
dummy = write_csv(x=AncilOrigUnits,file=file.path(lookup_path,paste0("Ancil_OrigUnits_",use_suffix,".csv")))
dummy = write_csv(x=AncilStdUnits ,file=file.path(lookup_path,paste0("Ancil_StdUnits_" ,use_suffix,".csv")))
# Save look-up tables
cat0(" - Save look-up tables to ",basename(Rdata_LookUp),".")
dummy = save( list = c( "TraitLUT" , "AncilLUT"
, "TraitOrigUnits", "TraitStdUnits"
, "AncilOrigUnits", "AncilStdUnits"
)#end c
, file = Rdata_LookUp
, compress = "xz"
, compression_level = 9
)#end save
}#end if (reload_lookup && file.exists(Rdata_LookUp))
# Exit script in case this is a dry run
if (is_dry_run){
cat0(" Look-up tables were successfully generated. Make sure to select traits and ancillary data,")
cat0(" then set variables \"ancil_file\" and \"trait_file\" with your selections. ")
stop(" Dry run: only look up tables were generated.")
}#end if (is_dry_run)
We now read in the TRY data base, and incorporate traits data to a tibble object, with one row for each observation.
if (reload_TidyTRY && file.exists(Rdata_RawTRY)){
# Load data.
cat0(" + Reload tidy TRY object from ",basename(Rdata_RawTRY),".")
dummy = load(Rdata_RawTRY)
}else{
# Reload TRY_Fix_OrigValue_Str.r as this is often developed as variables are loaded (and crash).
source(file.path(util_path,"TRY_Fix_Replicates_ValueKind.r"),chdir=TRUE)
source(file.path(util_path,"TRY_Fix_OrigValue_Str.r" ),chdir=TRUE)
source(file.path(util_path,"rconstants.r" ),chdir=TRUE)
source(file.path(util_path,"unitlist.r" ),chdir=TRUE)
# Reset all unique files in case we should rewrite them
cat0(" + Reset unique trait inventory:")
uniq_trait_list = list.files(path=uniq_trait_path,full.names=TRUE)
for (uniq_trait_file in uniq_trait_list){
uniq_trait_base = basename(uniq_trait_file)
cat0(" - Delete file: ",uniq_trait_base,".")
dummy = file.remove(uniq_trait_file)
}#end for (uniq_trait_file in uniq_trait_list)
# Find the list of all files to read.
try_file_list = list.files(author_path,pattern="\\.csv$")
n_try_file_list = length(try_file_list)
file_first = 1L
file_last = n_try_file_list
loop_list = seq(from=file_first,to=n_try_file_list,by=1)
# Initialise the tidy TRY object.
cat0(" + Initialise the tidy TRY object containing traits and ancillary variables.")
TidyTRY = tibble( ObservationID = integer(0L)
, DatasetID = character(0L)
, ScientificName = character(0L)
, Genus = character(0L)
, Family = character(0L)
, Order = character(0L)
, Class = character(0L)
, Phylum = character(0L)
, Count = numeric(0L)
, ValueKind = character(0L)
)#end tibble
# Add ancillary variables
for (a in sequence(n_try_ancil)){
a_name = try_ancil$Name [a]
a_type = try_ancil$Type [a]
a_supporting = try_ancil$Supporting[a]
# Only variables that are not supporting are added.
if (is.na(a_supporting)){
TidyTRY[[a_name]] = switch( EXPR = a_type
, integer = integer(0L)
, numeric = numeric(0L)
, character = character(0L)
, logical = logical(0L)
, date = as_date(character(0L))
, character(0L)
)#end switch
}#end if (is.na(a_supporting))
}#end for (a in sequence(n_try_ancil))
# Add traits
for (z in sequence(n_try_trait)){
z_name = try_trait$Name [z]
z_type = try_trait$Type [z]
z_supporting = try_trait$Supporting[z]
# Check whether to add variable (i.e., not a supporting variable)
if (is.na(z_supporting)){
z_add_list = z_name
}else{
z_add_list = strsplit(x=z_supporting,split=";")[[1]]
}#end if (is.na(z_supporting))
# Only variables that are not supporting are added.
for (z_add in z_add_list){
TidyTRY[[z_add]] = switch( EXPR = z_type
, integer = integer(0L)
, numeric = numeric(0L)
, character = character(0L)
, logical = logical(0L)
, date = as_date(character(0L))
, character(0L)
)#end switch
}#end for (z_add in z_addtrait)
}#end for (a in sequence(n_try_ancil))
# Append notes and author
TidyTRY$Notes = character(0L)
TidyTRY$Author = character(0L)
# Copy the empty tibble to a Template
CntTidyTRY = nrow(TidyTRY)
# Go through every file, and retrieve information.
cat0(" + Read the trait data base.")
for ( f in loop_list){
try_base = try_file_list[f]
try_file = file.path(author_path,try_base)
# Load author file.
cat0(" - ",sprintf("%4.4i of %4.4i",f,n_try_file_list),": Read file ",try_base,".")
trydb_now = read_csv( file = try_file
, col_types = paste(rep("c",times=28L),collapse="")
)#end read_csv
# Remove invalid characters
trydb_now = trydb_now %>%
mutate_all(~ iconv(.x,to=UTF8_System,sub="byte"))
# Standardise TRY Dataset identifiers. We append TRY to all of them.
trydb_now = trydb_now %>%
mutate( DatasetID = ifelse( test = ! is.na(suppressWarnings(as.integer(DatasetID)))
, yes = sprintf("TRY%4.4i",suppressWarnings(as.integer(DatasetID)))
, no = DatasetID
)#end ifelse
)#end mutate
# Standardise value kind names, and delete unwanted statistics (e.g., min/max) and
# duplicated observations (as flagged by TRY itself)
trydb_now = TRY_Fix_ValueKindName(trydb_now,discard_rm=TRUE,duplicate_rm=TRUE)
# Make sure value kind is ordered
if (! is.ordered(TidyTRY$ValueKind)){
TidyTRY$ValueKind = ordered(TidyTRY$ValueKind,levels=levels(trydb_now$ValueKindName))
}#end if (! is.ordered(TidyTRY$ValueKind))
# Find every observation that will be entered.
ObsUnique = sort(unique(trydb_now$ObservationID))
ObsAppend = ObsUnique[! (ObsUnique %in% TidyTRY$ObservationID)]
CntObsAppend = length(ObsAppend)
if (CntObsAppend > 0){
# Update number of lines in TidyTRY
OldCntTidyTRY = CntTidyTRY
CntTidyTRY = OldCntTidyTRY + CntObsAppend
NewTidyLines = seq(from=OldCntTidyTRY+1,to=CntTidyTRY)
# Append as many rows as needed for the new observations
TidyTRY = TidyTRY %>%
add_row( !! names(.)[1] := rep(NA,times=CntObsAppend)
, .after = nrow(TidyTRY) )
# Assign the observation names for the new lines, and make Notes blank by default.
TidyTRY$ObservationID[NewTidyLines] = as.integer(ObsAppend)
TidyTRY$Notes [NewTidyLines] = ""
}#end if (CntObsAppend > 0)
# Find out which lines have traits and which lines have ancillary information
IsTrait = trydb_now$TraitID %in% try_trait$TraitID
WhichTrait = unique(trydb_now$TraitID[IsTrait])
CntWhichTrait = length(WhichTrait)
# Append trait variables in case they are not in the data base.
for (w in sequence(CntWhichTrait)){
TraitIDNow = WhichTrait[w]
z = match(TraitIDNow,try_trait$TraitID)
# This should not happen...
if (! is.finite(z)) stop(paste0(" Unrecognised Trait ID: ",TraitIDNow,"."))
# Handy aliases
NameNow = try_trait$Name [z]
DescNow = try_trait$Desc [z]
UseStdUnitNow = try_trait$UseStdUnit[z]
TypeNow = try_trait$Type [z]
AddNow = eval(parse(text=try_trait$Add [z]))
MultNow = eval(parse(text=try_trait$Mult [z]))
PowerNow = eval(parse(text=try_trait$Power[z]))
UnitNow = try_trait$Unit [z]
SupportingNow = try_trait$Supporting[z]
XylemNow = as.integer(TraitIDNow) %in% c(719L,3479L)
LTextNow = as.integer(TraitIDNow) %in% c(2L)
#cat0(" ~ Add trait: ",DescNow," (",NameNow,").")
# Check whether to generate list of unique traits
if (gen_unique_trait){
uniq_trait_file = file.path( uniq_trait_path
, paste0(sprintf("%4.4i",as.integer(TraitIDNow)),"_"
,NameNow,"_UniqueValues_",use_suffix,".csv"
)#end paste0
)#end file.path
}else{
uniq_trait_file = NULL
}#end if (gen_unique_trait)
# Append values and units
sel = trydb_now$TraitID %in% TraitIDNow
idx = match(trydb_now$ObservationID[sel],TidyTRY$ObservationID)
# Separate the names of supporting variables
if (is.na(SupportingNow)){
OutputName = NameNow
}else{
OutputName = strsplit(x=SupportingNow,split=";")[[1]]
}#end if (is.na(SupportingNow))
# Load useful variables that will be needed regardless of whether we use the
# original or standardised data.
DatasetID = trydb_now$DatasetID [sel]
NameOrig = trydb_now$OriglName [sel]
AuthorName = paste(trydb_now$FirstName[sel],trydb_now$LastName[sel])
TraitVKind = trydb_now$ValueKindName[sel]
TraitCount = trydb_now$Replicates [sel]
# Decide which column to use for this trait (Standard or Original)
if (UseStdUnitNow){
# Use standard value, and change type.
TraitOrig = trydb_now$StdValue [sel]
UnitOrig = trydb_now$UnitName [sel]
TraitValue = TraitOrig
TraitValid = ! is.na(TraitValue)
TraitVName = ifelse(test=TraitValid,yes=OutputName,no=NA_character_)
}else{
# Use original value, and change type.
TraitOrig = trydb_now$OrigValueStr [sel]
UnitOrig = trydb_now$OrigUnitStr [sel]
# Apply some edits to ensure as much data as possible can be retrieved
TraitFixed = TRY_FixTrait_OrigValue_Str( TraitID = as.integer(TraitIDNow)
, Type = TypeNow
, TraitOrig = TraitOrig
, UnitOrig = UnitOrig
, NameOrig = NameOrig
, AuthorName = AuthorName
, UniqOutput = uniq_trait_file
, OutputName = OutputName
)#end TRY_FixTrait_OrigValue_Str
# The function retrieves both the value and the expected validity. Keep both
TraitValue = TraitFixed$Value
TraitValid = TraitFixed$Valid
TraitVName = TraitFixed$VName
}#end if (UseStdUnitNow)
# Change variable type. In case the variable is numeric, apply variable transformation too.
if (TypeNow %in% "date"){
TraitValue = as_date(TraitValue)
}else if (TypeNow %in% "numeric"){
TraitValue = AddNow + MultNow * as(TraitValue,TypeNow) ^ PowerNow
}else{
TraitValue = as(TraitValue,TypeNow)
}#end if (TypeNow %in% "date")
# Sanity check: stop and report any unexpected behaviour.
TraitMess = is.na(TraitValue) & (TraitValid)
MessMessage = paste0("(TID = ",trydb_now$TraitID[sel],"; OrigValue = ",TraitOrig,")")
# Append trait values unless some unexpected exception occurred.
if (any(TraitMess)){
DisplayMess = table(TraitOrig=tolower(TraitOrig),NameOrig=tolower(NameOrig),Trouble=TraitMess)
cat0(" ")
cat0("--------------")
cat0(" FATAL ERROR! ")
cat0("--------------")
cat0(" Trait: ",DescNow,".")
cat0(" Variable name: ",NameNow,".")
cat0(" TraitID: ",TraitIDNow,".")
cat0(" Author: ",paste(sort(unique(AuthorName)),collapse=" "),".")
cat0(" Unexpected exception occurred.")
cat0(" - Type \"print(Trouble)\" for detailed list of bogus entries.")
cat0(" - Type \"DisplayMess\" for quick reference of the original values.")
Trouble = tibble( Line = which(sel)[TraitMess]
, TraitOrig = TraitOrig [TraitMess]
, UnitOrig = UnitOrig [TraitMess]
, NameOrig = NameOrig [TraitMess]
, TraitValue = TraitValue[TraitMess]
, StdValue = (trydb_now$StdValue[sel])[TraitMess]
)#end tibble
stop("Error loading trait data!")
}else{
# Trait data are fine. We update the variable only if the trait variable
# is still NA, and if the filling variable has valid information (we do this to
# because many lines of the code may be associated with the same trait ID
# and observation ID, but often only one has meaningful information).
for (OutputNameNow in OutputName){
fill =
( is.na(TidyTRY[[OutputNameNow]][idx]) # Do not overwrite data
& (TraitVName %in% OutputNameNow) # Data m
& (! TraitVKind %in% "Discard")
)#end fill
idx_fill = idx[fill]
TidyTRY[[OutputNameNow]][idx_fill] = TraitValue[fill]
# Append information on number of data replicates and type of the aggregation.
# For count, use the law of minimum, and for value kind, we only update if the
# information has not been added. In both cases, we skip the information if the
# trait is not numeric
fill = fill & (TypeNow %in% "numeric")
idx_fill = idx[fill]
TidyTRY$Count [idx_fill] =
pmin(TidyTRY$Count [idx_fill],TraitCount[fill],na.rm=TRUE)
TidyTRY$ValueKind[idx_fill] =
pmax(TidyTRY$ValueKind[idx_fill],TraitVKind[fill],na.rm=TRUE)
}#end for (OutputNameNow in OutputName)
}#end if (any(TraitMess))
# Append scientific name, author and dataset ID (in case the information hasn't been already added).
TidyTRY$ScientificName[idx] = ifelse( test = is.na(TidyTRY$ScientificName[idx])
& (! is.na(trydb_now$AccSpeciesName[sel]))
, yes = trydb_now$AccSpeciesName[sel]
, no = TidyTRY$ScientificName[idx]
)#end ifelse
TidyTRY$Author [idx] = ifelse( test = is.na(TidyTRY$Author[idx]) & (! is.na(AuthorName))
, yes = AuthorName
, no = TidyTRY$Author[idx]
)#end ifelse
TidyTRY$DatasetID [idx] = ifelse( test = is.na(TidyTRY$DatasetID[idx]) & (! is.na(DatasetID))
, yes = DatasetID
, no = TidyTRY$DatasetID[idx]
)#end ifelse
}#end for (z in sequence(CntWhichTrait))
}#end for ( f in seq_along(try_file_list))
# Save look-up tables
save_file = (file_first == 1L) && (file_last == n_try_file_list)
if (save_file){
cat0(" - Save raw TRY trait data base to ",basename(Rdata_RawTRY))
dummy = save( list = c( "TidyTRY","load_ancil")
, file = Rdata_RawTRY
, compress = "xz"
, compression_level = 9
)#end save
}else{
cat0(" - Do not save trait data. Make sure \"loop_list\" starts at 1 and run chunk again.")
stop(" Trait loading set in debug mode. Stop for now.")
}#end if (loop_list[1] == 1L)
}#end if (reload_TidyTRY && file.exists(rdata_TidyTRY))
In this block we incorporate ancillary data to the data base.
if (load_ancil){
# Reload TRY_Fix_OrigValue_Str.r as this is often developed as variables are loaded (and crash).
source(file.path(util_path,"TRY_Fix_OrigValue_Str.r"),chdir=TRUE)
source(file.path(util_path,"rconstants.r" ),chdir=TRUE)
source(file.path(util_path,"unitlist.r" ),chdir=TRUE)
# Reset all unique files in case we should rewrite them
cat0(" + Reset unique ancillary inventory:")
uniq_ancil_list = list.files(path=uniq_ancil_path,full.names=TRUE)
for (uniq_ancil_file in uniq_ancil_list){
uniq_ancil_base = basename(uniq_ancil_file)
cat0(" - Delete file: ",uniq_ancil_base,".")
dummy = file.remove(uniq_ancil_file)
}#end for (uniq_trait_file in uniq_trait_list)
# Find the list of all files to read.
try_file_list = list.files(author_path,pattern="\\.csv$")
n_try_file_list = length(try_file_list)
# Create the loop for files. Typically file_first should be 1 and file_last the number of files.
# However, when debugging, it may be more efficient to skip the first files. We set a separate
# variable to prevent saving the R object unless we are reading every file.
file_first = 1L
file_last = n_try_file_list
loop_list = seq(from=file_first,to=n_try_file_list,by=1)
# List of data sets that describe treatment (experiment)
TreatmentID = try_ancil$DataID %in% c(238L,308L,319L,324L,363L,490L,4052L,4695L)
TreatmentName = try_ancil$Name[try_ancil$DataID %in% TreatmentID]
# Copy the empty tibble to a Template
CntTidyTRY = nrow(TidyTRY)
# Go through every file, and retrieve information.
cat0(" + Read the ancillary data base.")
for ( f in loop_list){
try_base = try_file_list[f]
try_file = file.path(author_path,try_base)
# Load author file.
cat0(" - ",sprintf("%4.4i of %4.4i",f,n_try_file_list),": Read file ",try_base,".")
trydb_now = read_csv( file = try_file
, col_types = paste(rep("c",times=28L),collapse="")
)#end read_csv
# Remove invalid characters
trydb_now = trydb_now %>%
mutate_all(~ iconv(.x,to=UTF8_System,sub="byte"))
# Standardise value kind names, and delete unwanted statistics (e.g., min/max)
trydb_now = TRY_Fix_ValueKindName(trydb_now,discard_rm=TRUE)
# Create column with author name, which may be useful for fixing ancillary data.
trydb_now$AuthorName = paste(trydb_now$FirstName,trydb_now$LastName)
# Find every observation that will be entered.
ObsUnique = sort(unique(trydb_now$ObservationID))
ObsAppend = ObsUnique[! (ObsUnique %in% TidyTRY$ObservationID)]
CntObsAppend = length(ObsAppend)
if (CntObsAppend > 0){
# Update number of lines in TidyTRY
OldCntTidyTRY = CntTidyTRY
CntTidyTRY = OldCntTidyTRY + CntObsAppend
NewTidyLines = seq(from=OldCntTidyTRY+1,to=CntTidyTRY)
# Append as many rows as needed for the new observations
TidyTRY = TidyTRY %>%
add_row( !! names(.)[1] := rep(NA,times=CntObsAppend)
, .after = nrow(TidyTRY) )
# Assign the observation names for the new lines, and make Notes and Authors
# blank by default.
TidyTRY$ObservationID[NewTidyLines] = as.integer(ObsAppend)
TidyTRY$Notes [NewTidyLines] = ""
TidyTRY$Author [NewTidyLines] = ""
}#end if (CntObsAppend > 0)
# Find out which lines have traits and which lines have ancillary information
IsAncil = is.na(trydb_now$TraitID) & (trydb_now$DataID %in% try_ancil$DataID)
WhichAncil = unique(trydb_now$DataID[IsAncil])
CntWhichAncil = length(WhichAncil)
# Append trait variables in case they are not in the data base.
for (w in sequence(CntWhichAncil)){
DataIDNow = WhichAncil[w]
z = match(DataIDNow,try_ancil$DataID)
# This should not happen...
if (! is.finite(z)) stop(paste0(" Unrecognised Ancillary Data Set ID: ",DataIDNow,"."))
# Handy aliases
NameNow = try_ancil$Name [z]
DescNow = try_ancil$Desc [z]
UseStdUnitNow = try_ancil$UseStdUnit[z]
TypeNow = try_ancil$Type [z]
AddNow = eval(parse(text=try_ancil$Add [z]))
MultNow = eval(parse(text=try_ancil$Mult [z]))
PowerNow = eval(parse(text=try_ancil$Power[z]))
UnitNow = try_ancil$Unit [z]
SupportingNow = try_ancil$Supporting[z]
# cat0(" ~ Add ancillary data: ",DescNow," (",NameNow,").")
# Check whether to generate list of unique traits
if (gen_unique_ancil){
uniq_ancil_file = file.path( uniq_ancil_path
, paste0( sprintf("%4.4i",as.integer(DataIDNow)),"_"
, NameNow,"_UniqueValues_",use_suffix,".csv"
)#end paste0
)#end file.path
}else{
uniq_ancil_file = NULL
}#end if (gen_unique_ancil)
# Append values and units
sel = is.na(trydb_now$TraitID) & (trydb_now$DataID %in% DataIDNow)
idx = match(trydb_now$ObservationID[sel],TidyTRY$ObservationID)
# Set the output name
if (is.na(SupportingNow)){
OutputName = NameNow
}else{
OutputName = SupportingNow
}#end if (is.na(SupportingNow))
# Decide which column to use for this trait (Standard or Original)
if (UseStdUnitNow){
# Use standard value, and change type.
AncilOrig = trydb_now$StdValue [sel]
UnitOrig = trydb_now$UnitName [sel]
NameOrig = trydb_now$OriglName [sel]
AncilAuthor = trydb_now$AuthorName[sel]
AncilValue = AncilOrig
AncilValid = ! is.na(AncilValue)
AncilVName = ifelse(test=AncilValid,yes=OutputName,no=NA_character_)
AncilTraitID = NA_integer_
AncilTrait = NA_character_
AncilAncilID = NA_integer_
AncilAncil = NA_character_
}else{
# Use original value, and change type.
AncilOrig = trydb_now$OrigValueStr[sel]
UnitOrig = trydb_now$OrigUnitStr [sel]
NameOrig = trydb_now$OriglName [sel]
AncilAuthor = trydb_now$AuthorName [sel]
# Apply some edits to ensure as much data as possible can be retrieved
AncilFixed = TRY_FixAncil_OrigValue_Str( DataID = DataIDNow
, Type = TypeNow
, AncilOrig = AncilOrig
, UnitOrig = UnitOrig
, NameOrig = NameOrig
, AuthorName = AncilAuthor
, UniqOutput = uniq_ancil_file
, OutputName = OutputName
)#end TRY_FixTrait_OrigValue_Str
# The function retrieves both the value and the expected validity. Keep both
AncilValue = AncilFixed$Value
AncilValid = AncilFixed$Valid
AncilVName = AncilFixed$VName
AncilTraitID = AncilFixed$TraitID
AncilTrait = AncilFixed$Trait
AncilAncilID = AncilFixed$AncilID
AncilAncil = AncilFixed$Ancil
# Also return the list of standard values, if present.
AncilLevels = attr(AncilFixed,"Levels")
}#end if (UseStdUnitNow)
# Change variable type. In case the variable is numeric, apply variable transformation too.
if (TypeNow %in% "date"){
AncilValue = as_date(AncilValue)
}else if (TypeNow %in% "numeric"){
AncilValue = AddNow + MultNow * as(AncilValue,TypeNow) ^ PowerNow
}else{
AncilValue = as(AncilValue,TypeNow)
}#end if (TypeNow %in% "date")
# Sanity check: stop and report any unexpected behaviour.
AncilMess = is.na(AncilValue) & (AncilValid)
MessMessage = paste0("(Ancillary = ",trydb_now$DataName[sel],"; OrigValue = ",AncilOrig,")")
# Append trait values unless some unexpected exception occurred.
if (any(AncilMess)){
cat0(" ")
cat0("--------------")
cat0(" FATAL ERROR! ")
cat0("--------------")
cat0(" Ancillary data: ",DescNow ,".")
cat0(" Variable name: " ,NameNow ,".")
cat0(" Data ID: " ,DataIDNow ,".")
cat0(" Author: " ,paste(sort(unique(AncilAuthor)),collapse=" "),".")
cat0(" Unexpected exception occurred. Type \"print(Trouble)\" for error.")
Trouble = tibble( Line = which(sel)[AncilMess]
, AncilOrig = AncilOrig [AncilMess]
, UnitOrig = UnitOrig [AncilMess]
, NameOrig = NameOrig [AncilMess]
, AncilValue = AncilValue[AncilMess]
, StdValue = (trydb_now$StdValue[sel])[AncilMess]
)#end tibble
stop("Error loading trait data!")
}else{
# Ancillary data are fine. We update the variable only if the ancillary variable
# is still NA, and only if the filling variable has information (we do this to
# because many lines of the code may be associated with the same ancillary variable
# and observation ID, but often only one has meaningful information). The only exception
# is if the variable is an experiment. In this case, we update treatment if there was
# previous (incorrect) indication that this was not an experiment. If we identify
# multiple experiments, we set the value to "other".
for (OutputNameNow in OutputName){
if (OutputNameNow %in% TreatmentName){
# Number of levels for treatment, and which are not "major" experiments
CntAncilLevels = length(AncilLevels)
MajorIndices =
which(! ( grepl(pattern="control",x=AncilLevels,ignore.case=TRUE)
| grepl(pattern="minor" ,x=AncilLevels,ignore.case=TRUE) )
)#end which
# Match indices for ancillary and reference
AncilIndex = match(AncilValue ,AncilLevels)
ReferIndex = match(TidyTRY[[OutputNameNow]],AncilLevels)
# Update new index in case there are multiple experiments
IsMultiple = ( ( AncilIndex %in% MajorIndices )
& ( ReferIndex %in% MajorIndices ) )
AncilValue[IsMultiple] = AncilLevels[CntAncilLevels]
AncilIndex[IsMultiple] = CntALevels
# Fill in information when new experiment information exists, or if
# the new information indicates higher levels of experiments.
fill = ( ( is.na(ReferIndex) | ( AncilIndex %gt% ReferIndex ) )
& ( AncilVName %in% OutputNameNow )
)#end fill
}else{
fill = is.na(TidyTRY[[OutputNameNow]][idx]) & (AncilVName %in% OutputNameNow)
}#end if (OutputNameNow %in% TreatmentName)
idx_fill = idx[fill]
TidyTRY[[OutputNameNow]][idx_fill] = AncilValue[fill]
}#end for (OutputNameNow in OutputName)
}#end if (any(AncilMess))
# Some ancillary data have information that is useful for filling other ancillary data
# Check for those.
WhichOther = sort(unique(AncilAncilID[AncilAncilID %in% try_ancil$DataID]))
CntWhichOther = length(WhichOther)
# Append trait variables in case they are not in the data base.
for (wa in sequence(CntWhichOther)){
AncilIDNow = WhichOther[wa]
za = match(AncilIDNow,try_ancil$DataID)
# This should not happen...
if (! is.finite(za)) stop(paste0(" Unrecognised Data ID: ",AncilIDNow,"."))
# Handy aliases
NameNow = try_ancil$Name [za]
DescNow = try_ancil$Desc [za]
UseStdUnitNow = try_ancil$UseStdUnit[za]
TypeNow = try_ancil$Type [za]
AddNow = eval(parse(text=try_ancil$Add [za]))
MultNow = eval(parse(text=try_ancil$Mult [za]))
PowerNow = eval(parse(text=try_ancil$Power[za]))
UnitNow = try_ancil$Unit [za]
SupportingNow = try_ancil$Supporting[za]
cat0(" ~ Update ancillary data: ",DescNow," (",NameNow,").")
# Check whether to generate list of unique traits
# Select data that should be updated.
AncilValue = ifelse( test = AncilAncilID %in% AncilIDNow
, yes = AncilAncil
, no = NA_character_
)#end ifelse
# Change variable type. In case the variable is numeric, apply variable transformation too.
if (TypeNow %in% "date"){
AncilValue = as_date(AncilValue)
}else if (TypeNow %in% "numeric"){
AncilValue = AddNow + MultNow * as(AncilValue,TypeNow) ^ PowerNow
}else{
AncilValue = as(AncilValue,TypeNow)
}#end if (TypeNow %in% "date")
# Update values that are not missing values.
TidyTRY[[NameNow]][idx] = ifelse( test = is.na(AncilValue)
, yes = TidyTRY[[NameNow]][idx]
, no = AncilValue
)#end ifelse
}#end for (wa in sequence(CntWhichAncil)){
# Some ancillary data have information that is useful for filling in traits.
# Check for those.
WhichTrait = sort(unique(AncilTraitID[AncilTraitID %in% try_trait$TraitID]))
CntWhichTrait = length(WhichTrait)
# Append trait variables in case they are not in the data base.
for (wt in sequence(CntWhichTrait)){
TraitIDNow = WhichTrait[wt]
zt = match(TraitIDNow,try_trait$TraitID)
# This should not happen...
if (! is.finite(zt)) stop(paste0(" Unrecognised Trait ID: ",TraitIDNow,"."))
# Handy aliases
NameNow = try_trait$Name [zt]
DescNow = try_trait$Desc [zt]
UseStdUnitNow = try_trait$UseStdUnit[zt]
TypeNow = try_trait$Type [zt]
AddNow = eval(parse(text=try_trait$Add [zt]))
MultNow = eval(parse(text=try_trait$Mult [zt]))
PowerNow = eval(parse(text=try_trait$Power[zt]))
UnitNow = try_trait$Unit [zt]
SupportingNow = try_trait$Supporting[z]
cat0(" ~ Update trait: ",DescNow," (",NameNow,").")
# Check whether to generate list of unique traits
# Select data that should be updated.
TraitValue = ifelse( test = AncilTraitID %in% TraitIDNow
, yes = AncilTrait
, no = NA_character_
)#end ifelse
# Change variable type. In case the variable is numeric, apply variable transformation too.
if (TypeNow %in% "date"){
TraitValue = as_date(TraitValue)
}else if (TypeNow %in% "numeric"){
TraitValue = AddNow + MultNow * as(TraitValue,TypeNow) ^ PowerNow
}else{
TraitValue = as(TraitValue,TypeNow)
}#end if (TypeNow %in% "date")
# Update values that are not missing values.
TidyTRY[[NameNow]][idx] = ifelse( test = is.na(TraitValue)
, yes = TidyTRY[[NameNow]][idx]
, no = TraitValue
)#end ifelse
}#end for (w in sequence(CntWhichTrait)){
# Append scientific name and author (in case the information hasn't been already added).
TidyTRY$ScientificName[idx] = ifelse( test = is.na(TidyTRY$ScientificName[idx])
& (! is.na(trydb_now$AccSpeciesName[sel]))
, yes = trydb_now$AccSpeciesName[sel]
, no = TidyTRY$ScientificName[idx]
)#end ifelse
TidyTRY$Author [idx] = ifelse( test = is.na(TidyTRY$Author[idx]) & (! is.na(AncilAuthor))
, yes = AncilAuthor
, no = TidyTRY$Author[idx]
)#end ifelse
}#end for (wt in sequence(CntWhichTrait))
}#end for ( f in seq_along(try_file_list))
# Update step so we can skip loading ancillary data again
load_ancil = FALSE
# Reorder variable names for clarity
first_names = c("ObservationID","ScientificName","Genus","Family","Order","Class","Phylum","Author")
other_names = names(TidyTRY)[! names(TidyTRY) %in% first_names]
order_names = c(first_names,other_names)
TidyTRY = TidyTRY %>% select_at(all_of(order_names))
# Save look-up tables
save_file = (file_first == 1L) && (file_last == n_try_file_list)
if (save_file){
cat0(" - Save raw TRY data base with ancillary information to ",basename(Rdata_RawTRY))
dummy = save( list = c( "TidyTRY","load_ancil")
, file = Rdata_RawTRY
, compress = "xz"
, compression_level = 9
)#end save
}else{
cat0(" - Ancillary files were only partially read (due to debugging).")
cat0(" Check that \"file_first\" is set to 1 and \"file_last\" is set to \"n_try_file_list\".")
stop(" Ended the debugging of ancillary file load. Re-run script with complete file loop settings.")
}#end if (save_file)
}else{
cat0(" + Ancillary data have been already loaded.")
}# if (load_ancil)
Here we append trait data from other data sets (but mind that the traits must be standardised beforehand).
if (file.exists(Rdata_ExtraTRY)){
# Load data.
cat0(" + Reload tidy TRY with extra data from ",basename(Rdata_ExtraTRY),".")
dummy = load(Rdata_ExtraTRY)
}else{
# Reload TRY_Fix_OrigValue_Str.r as this is often developed as variables are loaded (and crash).
source(file.path(util_path,"TRY_Fix_Replicates_ValueKind.r"),chdir=TRUE)
source(file.path(util_path,"TRY_Fix_OrigValue_Str.r" ),chdir=TRUE)
source(file.path(util_path,"rconstants.r" ),chdir=TRUE)
source(file.path(util_path,"unitlist.r" ),chdir=TRUE)
# Reset all unique files in case we should rewrite them
CntExtra = length(extra_file_list)
for (e in sequence(CntExtra)){
ExtraFile = extra_file_list[e]
cat0(" + Load extra data sets from file ",ExtraFile,".")
# Ugly temporary solution, to make sure every column is set to character
HeadNow = read_csv(file=ExtraFile, n_max = 1)
ColTypes = paste(rep("c",ncol(HeadNow)),collapse="")
ExtraNow = read_csv(file=ExtraFile,col_types=ColTypes,na=c("","NA"))
# Standardise the columns for Count and Value Kind Name
ExtraNow = TRY_Fix_ValueKindName( RawTRY = ExtraNow
, discard_rm = TRUE
, ValueKind = "ValueKind"
, Count = "Count"
)#end TRY_Fix_ValueKindName
# Extract column names. For now this is hard coded but leaf texture and xylem vulnerability
# curve at different conductivity losses is provided as a single trait ID in TRY.
# Suggestions on how to make this more generalisable are welcome.
ColNames = names(ExtraNow)
SubNames = ColNames
SubNames = gsub(pattern="xylem_p[0-9][0-9]",replacement="xylem_pxx" ,x=SubNames)
SubNames = gsub(pattern="leaf_f_tear" ,replacement="leaf_texture",x=SubNames)
SubNames = gsub(pattern="leaf_f_punct" ,replacement="leaf_texture",x=SubNames)
SubNames = gsub(pattern="leaf_t_tear" ,replacement="leaf_texture",x=SubNames)
SubNames = gsub(pattern="leaf_t_punct" ,replacement="leaf_texture",x=SubNames)
# Identify columns that are traits then apply variable transformation if needed
IsTrait = which(SubNames %in% try_trait$Name)
WhichColumn = ColNames[IsTrait]
WhichTrait = SubNames[IsTrait]
CntTraitAppend = length(IsTrait)
for (w in sequence(CntTraitAppend)){
ColumnNow = WhichColumn[w]
TraitNow = WhichTrait [w]
z = match(TraitNow,try_trait$Name)
# Handy aliases
NameNow = try_trait$Name [z]
DescNow = try_trait$Desc [z]
TypeNow = try_trait$Type [z]
cat0(" ~ Add trait: ",DescNow," (",NameNow,"). Variable type: ",TypeNow,".")
# Change variable type. In case the variable is numeric, apply variable transformation too.
if (TypeNow %in% "date"){
ExtraNow[[ColumnNow]] = as_date(ExtraNow[[ColumnNow]])
}else{
ExtraNow[[ColumnNow]] = as(ExtraNow[[ColumnNow]],TypeNow)
}#end if (TypeNow %in% "date")
}#end for (it in sequence(CntTraitAppend))
# Identify columns that are ancillary variables then apply variable transformation if needed
IsAncil = which(SubNames %in% try_ancil$Name)
WhichColumn = ColNames[IsAncil]
WhichAncil = SubNames[IsAncil]
CntAncilAppend = length(IsAncil)
for (w in sequence(CntAncilAppend)){
ColumnNow = WhichColumn[w]
AncilNow = WhichAncil [w]
z = match(AncilNow,try_ancil$Name)
# Handy aliases
NameNow = try_ancil$Name [z]
DescNow = try_ancil$Desc [z]
TypeNow = try_ancil$Type [z]
cat0(" ~ Add ancillary variable: ",DescNow," (",NameNow,"). Variable type: ",TypeNow,".")
# Change variable type. In case the variable is numeric, apply variable transformation too.
if (TypeNow %in% "date"){
ExtraNow[[ColumnNow]] = as_date(ExtraNow[[ColumnNow]])
}else{
ExtraNow[[ColumnNow]] = as(ExtraNow[[ColumnNow]],TypeNow)
}#end if (TypeNow %in% "date")
}#end for (it in sequence(CntTraitAppend))
# Create unique observation ID for the extra file, and bind append it to the main structure.
KeepNames = ColNames[ColNames %in% names(TidyTRY)]
ExtraNow = ExtraNow %>% select(all_of(KeepNames))
ExtraNow = ExtraNow %>%
mutate( ObservationID = max(TidyTRY$ObservationID)+sequence(nrow(ExtraNow)))
TidyTRY = bind_rows(TidyTRY,ExtraNow)
}#end for (e in sequence(CntExtra))
# stop("test")
# Update step so we can skip loading extra data again
load_extra = FALSE
# Save look-up tables
cat0(" - Save tidy TRY data base to ",basename(Rdata_ExtraTRY))
dummy = save( list = c( "TidyTRY","load_ancil")
, file = Rdata_ExtraTRY
, compress = "xz"
, compression_level = 9
)#end save
}# if (file.exists(Rdata_ExtraTRY))
Now we run additional trait harmonisation. Specifically, we standardise scientific names (always use the accepted name), make sure countries/continents are consistent with coordinates, and fix taxonomy to the best extent possible.
if (file.exists(Rdata_GeolocationTRY)){
# Load data.
cat0(" + Reload TRY with harmonised geolocation from ",basename(Rdata_GeolocationTRY),".")
dummy = load(Rdata_GeolocationTRY)
}else{
cat0(" + Harmonise geographic and taxonomic information.")
# Load some files which will likely be updated as the code is developed.
source(file.path(util_path,"TRY_Harmonise_Utils.r"),chdir=TRUE)
source(file.path(util_path,"rconstants.r" ),chdir=TRUE)
source(file.path(util_path,"unitlist.r" ),chdir=TRUE)
# Harmonise geographic coordinates
IsGeoCoord = all(c(59L,60L) %in% try_ancil$DataID)
if (IsGeoCoord){
cat0(" - Harmonise geographic coordinates")
Lon = try_ancil$Name[try_ancil$DataID %in% 60L]
Lat = try_ancil$Name[try_ancil$DataID %in% 59L]
TidyTRY[[Lon]] = ifelse(test=TidyTRY[[Lon]] %wr% c(-180.,360.),yes=TidyTRY[[Lon]],no=NA_real_)
TidyTRY[[Lat]] = ifelse(test=TidyTRY[[Lat]] %wr% c( -90., 90.),yes=TidyTRY[[Lat]],no=NA_real_)
# Make sure longitude is defined between -180 and +180 (not 0-360)
TidyTRY[[Lon]] = ((TidyTRY[[Lon]]+180.) %% 360.) - 180.
}#end if (IsGeoCoord)
# Harmonise countries and continents.
IsLocation = all(c(1412L,1413L) %in% try_ancil$DataID)
if (IsGeoCoord & IsLocation){
cat0(" - Harmonise country and continent information.")
# Select variables for coordinates and location.
Lon = try_ancil$Name[try_ancil$DataID %in% 60L ]
Lat = try_ancil$Name[try_ancil$DataID %in% 59L ]
Country = try_ancil$Name[try_ancil$DataID %in% 1412L]
Continent = try_ancil$Name[try_ancil$DataID %in% 1413L]
sel = is.finite(TidyTRY[[Lon]]) & is.finite(TidyTRY[[Lat]])
GeoHarm = TRY_LonLatToGeoInfo( lon = TidyTRY[[Lon]][sel], lat = TidyTRY[[Lat]][sel], geo_adm1_path = geo_adm1_path)
TidyTRY[[Country ]][sel] = GeoHarm$Country
TidyTRY[[Continent]][sel] = GeoHarm$Continent
}#end if (IsGeoCoord & IsLocation)
# Save look-up tables
cat0(" - Save geolocation-corrected TRY data base to ",basename(Rdata_GeolocationTRY))
dummy = save( list = c( "TidyTRY","load_ancil")
, file = Rdata_GeolocationTRY
, compress = "xz"
, compression_level = 9
)#end save
}#end if (file.exists(Rdata_GeolocationTRY))
We now harmonise some geographical data (e.g., climate, topography) for measurements with geographic information.
if (file.exists(Rdata_GeodataTRY)){
# Load data.
cat0(" + Reload TRY with harmonised geographic data from ",basename(Rdata_GeodataTRY),".")
dummy = load(Rdata_GeodataTRY)
}else{
cat0(" + Harmonise geographic information.")
# Harmonise climate by using an external Koppen-Geiger map for observations with geolocation.
IsClimate = any(try_trait$TraitID %in% 825L)
IsGeoCoord = all(c(59L,60L) %in% try_ancil$DataID)
if (IsGeoCoord & IsClimate){
cat0(" - Harmonise climate information.")
# Select climate variable.
Climate = try_trait$Name[try_trait$TraitID %in% 825L]
Lon = try_ancil$Name[try_ancil$DataID %in% 60L ]
Lat = try_ancil$Name[try_ancil$DataID %in% 59L ]
# Read the climate data base
KoppenID = terra::rast(koppen_raster) # KoppenID = raster::raster(koppen_raster)
KoppenFun = function(x,koppen_class) ifelse(x %in% koppen_class,x,NA_integer_)
KoppenID = terra::app(KoppenID,function(x){KoppenFun(x,koppen_class=koppen_class)}) # KoppenID = calc(KoppenID,function(x){KoppenFun(x,koppen_class=koppen_class)})
KoppenID = as.factor(KoppenID)
# Find observations with geolocation
GeoIndex = is.finite(TidyTRY[[Lon]]) & is.finite(TidyTRY[[Lat]])
TRYPoints = vect(TidyTRY[GeoIndex, c(Lon, Lat)], crs="EPSG:4326")
TRYPoints = project(TRYPoints, crs(KoppenID))
# Extract data from raster
KoppenTRY = terra::extract(KoppenID,TRYPoints, cells=FALSE, xy=FALSE, ID=FALSE)
KoppenTRY = as.integer(KoppenTRY$lyr.1)
KoppenTRY = ifelse( test = KoppenTRY %in% koppen_class
, yes = names(koppen_class)[KoppenTRY]
, no = NA_character_
)#end ifelse
# Update climate classification
TidyTRY[[Climate]][GeoIndex] = KoppenTRY
}#end if (any(try_trait$TraitID %in% 825L))
# Harmonise additional data by using external maps for observations with geolocation.
for (o in sequence(nrow(owrite_data))){
VarIDNow = owrite_data$VarID [o]
TraitNow = owrite_data$Trait [o]
GeoTIFFNow = owrite_data$GeoTIFF[o]
Add0Now = owrite_data$Add0 [o]
MultNow = owrite_data$Mult [o]
# Look for the variable and whether or not it is loaded.
if (TraitNow){
IsVar = any(try_trait$TraitID %in% VarIDNow)
}else{
IsVar = any(try_ancil$DataID %in% VarIDNow)
}#end if (TraitNow)
# Replace data if both the geographic coordinates and the variable are loaded.
if (IsGeoCoord & IsVar){
if (TraitNow){
VarName = try_trait$Name[try_trait$TraitID %in% VarIDNow]
VarDesc = try_trait$Desc[try_trait$TraitID %in% VarIDNow]
}else{
VarName = try_ancil$Name[try_ancil$DataID %in% VarIDNow]
VarDesc = try_ancil$Desc[try_ancil$DataID %in% VarIDNow]
}#end if (TraitNow)
Lon = try_ancil$Name[try_ancil$DataID %in% 60L ]
Lat = try_ancil$Name[try_ancil$DataID %in% 59L ]
cat0(" - Harmonise ",VarDesc,".")
# Read the variable data base
VarRaster = terra::rast(GeoTIFFNow)
VarFun = function(x,add0,mult) add0 + mult * x
VarValue = terra::app(VarRaster,function(x){VarFun(x,add0=Add0Now,mult=MultNow)})
# Find observations with geolocation
GeoIndex = is.finite(TidyTRY[[Lon]]) & is.finite(TidyTRY[[Lat]])
TRYPoints = vect(x=TidyTRY[GeoIndex, c(Lon, Lat)],crs="EPSG:4326")
TRYPoints = project(TRYPoints, crs(KoppenID))
# Extract data from raster
VarTRY = terra::extract(VarValue, TRYPoints, cells=FALSE, xy=FALSE, ID=FALSE)
VarTRY = VarTRY$lyr.1
# Update data
TidyTRY[[VarName]][GeoIndex] = VarTRY
}#end if (IsGeoCoord & IsVar)
}#end for (o in sequence(nrow(overwrite_data))){
# Save look-up tables
cat0(" - Save TRY data base with harmonised geographic information to ",basename(Rdata_GeodataTRY))
dummy = save( list = c( "TidyTRY","load_ancil")
, file = Rdata_GeodataTRY
, compress = "xz"
, compression_level = 9
)#end save
}#end if (file.exists(Rdata_GeodataTRY))
We now restrict the data to the region of interest and harmonise taxonomy. We restrict the data at this stage because TRY has some data with common name incorrectly defined as scientific name, and harmonising these requires local knowledge.
if (file.exists(Rdata_TaxonomyTRY)){
# Load data.
cat0(" + Reload TRY with harmonised taxonomy from ",basename(Rdata_TaxonomyTRY),".")
dummy = load(Rdata_TaxonomyTRY)
}else{
cat0(" + Harmonise taxonomic information.")
# Keep only the measurements from selected realm before we attempt to harmonise the names.
cat0(" - Subset observations in the region of interest.")
# Create a shorter TRY data base with standard names
Lon = try_ancil$Name[try_ancil$DataID %in% 60L ]
Lat = try_ancil$Name[try_ancil$DataID %in% 59L ]
Site = try_ancil$Name[try_ancil$DataID %in% 114L ]
Country = try_ancil$Name[try_ancil$DataID %in% 1412L]
Continent = try_ancil$Name[try_ancil$DataID %in% 1413L]
Climate = try_trait$Name[try_trait$TraitID %in% 825L ]
ClimatePFT = try_ancil$Name[try_ancil$DataID %in% 4736L]
Biome = try_ancil$Name[try_ancil$DataID %in% 193L ]
Empty = rep(NA,times=nrow(TidyTRY))
SubsetTRY = tibble( lon = if(length(Lon ) == 0L){Empty}else{TidyTRY[[Lon ]]}
, lat = if(length(Lat ) == 0L){Empty}else{TidyTRY[[Lat ]]}
, site = if(length(Site ) == 0L){Empty}else{TidyTRY[[Site ]]}
, country = if(length(Country ) == 0L){Empty}else{TidyTRY[[Country ]]}
, continent = if(length(Continent ) == 0L){Empty}else{TidyTRY[[Continent ]]}
, climate = if(length(Climate ) == 0L){Empty}else{TidyTRY[[Climate ]]}
, climate_pft = if(length(ClimatePFT) == 0L){Empty}else{TidyTRY[[ClimatePFT]]}
, biome = if(length(Biome ) == 0L){Empty}else{TidyTRY[[Biome ]]}
)#end tibble
# Select the approach subset function based on the realm.
if (use_realm %in% "WestUS"){
KeepObs = SelectWestUS(SubsetTRY)
}else if (use_realm %in% "NeoTropical"){
KeepObs = SelectNeoTropical(SubsetTRY)
}else if (use_realm %in% "AfricaTropical"){
KeepObs = SelectAfricaTropical(SubsetTRY)
}else{
stop(" Pantropical filter still needs to be implemented.")
# KeepObs = SelectPanTropical(TidyTRY)
}#end if (fitrealm %in% "WestUS")
# Filter data to remove sites outside the domain of interest
TidyTRY = TidyTRY[KeepObs,,drop=FALSE]
# Harmonise scientific names
cat0(" - Harmonise scientific names.")
# Remove "x " from scientific names
cat0(" ~ Remove \"x\" from scientific names.")
TidyTRY$ScientificName = gsub(pattern="^x ",replacement="",x=TidyTRY$ScientificName)
# Create unique list of scientific names
cat0(" ~ List unique scientific names (as provided in the TRY data base).")
UniqScientific = sort(unique(str_to_sentence(TidyTRY$ScientificName)))
CntUniqTaxon = length(UniqScientific)
UniqTaxon = tibble( ScientificName = rep(NA_character_,times=length(UniqScientific))
, Genus = rep(NA_character_,times=length(UniqScientific))
, Family = rep(NA_character_,times=length(UniqScientific))
, Order = rep(NA_character_,times=length(UniqScientific))
, Class = rep(NA_character_,times=length(UniqScientific))
, Phylum = rep(NA_character_,times=length(UniqScientific))
, GrowthForm = rep(NA_character_,times=length(UniqScientific))
)#end tibble
# If a taxonomic table is provided, use it first.
if (file.exists(taxon_file)){
cat0(" ~ Retrieve taxonomic information from file ",basename(taxon_file),".")
# Read in taxonomic information.
TaxonList = read_csv(file = taxon_file,show_col_types = FALSE)
# Standardise caption
TaxonList$TRYName = str_to_sentence(TaxonList$TRYName )
TaxonList$AcceptedName = str_to_sentence(TaxonList$AcceptedName )
TaxonList$AcceptedFamily = str_to_title (TaxonList$AcceptedFamily)
if ("AcceptedGenus" %in% names(TaxonList)) TaxonList$AcceptedGenus = str_to_title(TaxonList$AcceptedGenus )
if ("AcceptedOrder" %in% names(TaxonList)) TaxonList$AcceptedOrder = str_to_title(TaxonList$AcceptedOrder )
if ("AcceptedClass" %in% names(TaxonList)) TaxonList$AcceptedClass = str_to_title(TaxonList$AcceptedClass )
if ("AcceptedPhylum" %in% names(TaxonList)) TaxonList$AcceptedPhylum = str_to_title(TaxonList$AcceptedPhylum)
if ("GrowthForm" %in% names(TaxonList)) TaxonList$GrowthForm = str_to_title(TaxonList$GrowthForm )
# Search the species in the look-up table.ta
cat0(" ~ Search species in the look-up table.")
IdxUniq = match(UniqScientific,TaxonList$TRYName)
sel = (! is.na(IdxUniq))
UniqTaxon$ScientificName[sel] = TaxonList$AcceptedName [IdxUniq[sel]]
UniqTaxon$Family [sel] = TaxonList$AcceptedFamily[IdxUniq[sel]]
# Remove variants and sub-species
UniqTaxon = UniqTaxon %>%
mutate(ScientificName = gsub(pattern="(\\w+\\s+\\w+).*",replacement="\\1",x=ScientificName))
# Fill in genus, and populate empty scientific names if column genus is provided.
# If column genus is not provided, extract the first word from the scientific name
if ("AcceptedGenus" %in% names(TaxonList)){
UniqTaxon$Genus[sel] = TaxonList$AcceptedGenus[IdxUniq[sel]]
UniqTaxon = UniqTaxon %>%
mutate(ScientificName = ifelse(test=is.na(ScientificName),yes=Genus,no=ScientificName))
}else{
UniqTaxon = UniqTaxon %>%
mutate(Genus = gsub(pattern="(\\w+).*",replacement="\\1",x=ScientificName))
}#end if ("AcceptedGenus" %in% names(TaxonList))
# Fill in higher taxa
if ("AcceptedOrder" %in% names(TaxonList)) UniqTaxon$Order [sel] = TaxonList$AcceptedOrder [IdxUniq[sel]]
if ("AcceptedClass" %in% names(TaxonList)) UniqTaxon$Class [sel] = TaxonList$AcceptedClass [IdxUniq[sel]]
if ("AcceptedPhylum" %in% names(TaxonList)) UniqTaxon$Phylum[sel] = TaxonList$AcceptedPhylum[IdxUniq[sel]]
}else if (! is.na(taxon_file)){
cat0(" ~ File ",basename(taxon_file)," not found! Retrieve taxonomic information online.")
IdxUniq = rep(NA_integer_,times=length(UniqScientific))
}else{
cat0(" ~ No taxonomic look-up table provided. Retrieve taxonomic information online.")
IdxUniq = rep(NA_integer_,times=length(UniqScientific))
}#end if (file.exists(taxon_file))
# Check for missing species.
UniqMiss = which(is.na(IdxUniq))
CntUniqMiss = length(UniqMiss)
# Decide whether to fill in any missing taxonomic information or stop the run.
if ( ( CntUniqMiss %gt% 0L ) && file.exists(taxon_file) && taxa_must_be_in_file ){
# User does not want to fill in taxonomy online. Stop execution.
cat0("---~---")
cat0(" File ",basename(taxon_file)," does not list all species in data base.")
cat0(" Number of missing species: ",CntUniqMiss,".")
cat0(" To see missing species, check variable \"UniqMiss\".")
cat0(" ")
cat0(" If you prefer to fill in missing species online, set \"taxa_must_be_in_file=FALSE\" or")
cat0(" or ignore taxonomic file by setting \"taxon_file=NA_character_\".")
cat0("---~---")
stop(" Missing species in taxonomic data base.")
}else if (length(UniqMiss) %gt% 0L){
cat0(" ~ Search remaining species online.")
UniqCheck = tibble( name = UniqScientific[UniqMiss]
, kingdom = "Plantae"
)#end tibble
UniqResolve = name_backbone_checklist(name_data = UniqCheck)
MissLevel = c("species","genus","family","order","class","phylum")
MissLevel = MissLevel[! MissLevel %in% names(UniqResolve)]
UniqResolve = UniqResolve %>%
mutate( across(c(MissLevel), ~ NA_character_)) %>%
mutate( acceptedName = ifelse(test=is.na(species),yes=genus,no=species) )
UniqTaxon$ScientificName[UniqMiss] = UniqResolve$acceptedName
UniqTaxon$Genus [UniqMiss] = UniqResolve$genus
UniqTaxon$Family [UniqMiss] = UniqResolve$family
UniqTaxon$Order [UniqMiss] = UniqResolve$order
UniqTaxon$Class [UniqMiss] = UniqResolve$class
UniqTaxon$Phylum [UniqMiss] = UniqResolve$phylum
}#end if (length(UniqMiss) %gt% 0L)
# Find order, class and phylum for each family
MissHighTaxa = ( ( is.na(UniqTaxon$Order) | is.na(UniqTaxon$Class) | is.na(UniqTaxon$Phylum) )
& (! is.na(UniqTaxon$Family) )
)#end MissHighTaxa
UniqFamily = sort(unique(UniqTaxon$Family[MissHighTaxa]))
if (length(UniqFamily) %gt% 0L){
cat0(" ~ Search higher taxonomy information online.")
UniqIndex = match(UniqTaxon$Family,UniqFamily)
UniqSel = is.finite(UniqIndex)
UniqCheck = tibble( name = UniqFamily, kingdom = "Plantae")
UniqResolve = name_backbone_checklist(name_data = UniqCheck)
MissLevel = c("family","order","class","phylum")
MissLevel = MissLevel[! MissLevel %in% names(UniqResolve)]
UniqResolve = UniqResolve %>%
mutate( across(c(MissLevel), ~ NA_character_))
UniqTaxon$Family[UniqSel] = UniqResolve$family[UniqIndex[UniqSel]]
UniqTaxon$Order [UniqSel] = UniqResolve$order [UniqIndex[UniqSel]]
UniqTaxon$Class [UniqSel] = UniqResolve$class [UniqIndex[UniqSel]]
UniqTaxon$Phylum[UniqSel] = UniqResolve$phylum[UniqIndex[UniqSel]]
}#end if (length(UniqFamily) %gt% 0L)
# Assign growth form in case they are available, then complement with taxa that can be related to growth form
# Note that setting Poaceae to grass is not recommended, as we distinguish grasses from bamboos.
cat0(" ~ Fill in growth form when they are synonym of a taxonomic group")
if ("TaxonList" %in% ls()){
if ("GrowthForm" %in% names(TaxonList)) UniqTaxon$GrowthForm[sel] = TaxonList$GrowthForm[IdxUniq[sel]]
}#end if ("TaxonList" %in% ls())
UniqTaxon$GrowthForm[UniqTaxon$Family %in% "Arecaceae" ] = "Palm"
UniqTaxon$GrowthForm[UniqTaxon$Class %in% "Polypodiopsida" ] = "Fern"
# Before we proceed, we fill in missing species information.
# This is needed to make sure we don't make assumptions about growth form for observations not defined at species level
UniqTaxon = UniqTaxon %>%
mutate( UniqID = seq_along(ScientificName)
, Phylum = ifelse(test=is.na(Phylum),yes="Ignotophyta",no=Phylum)
, Class = ifelse(test=is.na(Class ),yes="Ignotopsida",no=Class )
, Order = ifelse(test=is.na(Order ),yes="Ignotales" ,no=Order )
, Family = ifelse(test=is.na(Family),yes="Ignotaceae" ,no=Family)
, Genus = ifelse( test = is.na(Genus )
, yes = ifelse( test = Family %in% "Ignotaceae"
, yes = "Ignotum"
, no = paste0("Ignotum_",tolower(Family))
)#end ifelse
, no = Genus
)#end ifelse
, Epithet = word(string=ScientificName,start=2L,end=2L)
) %>% #end mutate
group_by(Family,Genus) %>%
mutate( CntNA = cumsum(is.na(Epithet))
, Epithet = ifelse( test = is.na(Epithet)
, yes = sprintf("sp%4.4i",CntNA)
, no = Epithet
)#end ifelse
, ScientificName = paste(Genus,Epithet)
) %>% #end mutate
ungroup() %>%
select(UniqID,ScientificName,Genus,Family,Order,Class,Phylum,GrowthForm)
# Map resolved names and growth form to the data set
idx = match(TidyTRY$ScientificName,UniqScientific)
TidyTRY$ScientificName = UniqTaxon$ScientificName[idx]
TidyTRY$Genus = UniqTaxon$Genus [idx]
TidyTRY$Family = UniqTaxon$Family [idx]
TidyTRY$Order = UniqTaxon$Order [idx]
TidyTRY$Class = UniqTaxon$Class [idx]
TidyTRY$Phylum = UniqTaxon$Phylum [idx]
if (any(try_trait$TraitID %in% 42L)){
cat0(" - Update growth form.")
# Build a subset of the tibble for testing tree likelihood.
GrowthForm = try_trait$Name[try_trait$TraitID %in% 42L]
Woodiness = try_trait$Name[try_trait$TraitID %in% 38L]
WoodDens = try_trait$Name[try_trait$TraitID %in% 4L]
BarkDens = try_trait$Name[try_trait$TraitID %in% 248L]
DBH = try_trait$Name[try_trait$TraitID %in% 21L]
Height = try_trait$Name[try_trait$TraitID %in% 18L]
Raunkiaer = try_trait$Name[try_trait$TraitID %in% 343L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# User-provided growth form has the last word, but only if there is any information.
TidyTRY[[GrowthForm]] = ifelse( test = is.na(UniqTaxon$GrowthForm[idx])
, yes = TidyTRY[[GrowthForm]]
, no = UniqTaxon$GrowthForm[idx]
)#end ifelse
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, growth_form = TidyTRY[[GrowthForm]]
, plant_woodiness = if(length(Woodiness) == 0L){EmptyChar}else{TidyTRY[[Woodiness]]}
, wood_dens = if(length(WoodDens ) == 0L){EmptyReal}else{TidyTRY[[WoodDens ]]}
, bark_dens = if(length(BarkDens ) == 0L){EmptyReal}else{TidyTRY[[BarkDens ]]}
, dbh = if(length(DBH ) == 0L){EmptyReal}else{TidyTRY[[DBH ]]}
, height = if(length(Height ) == 0L){EmptyReal}else{TidyTRY[[Height ]]}
, raunkiaer = if(length(Raunkiaer) == 0L){EmptyChar}else{TidyTRY[[Raunkiaer]]}
)#end tibble
# Run an additional data harmonisation for growth form, by expanding knowledge
# from additional traits (e.g. availability of wood/bark density, height above 5 m, etc.)
SubsetTRY = TRY_Harmonise_GrowthForm(x=TidyTRY)
TidyTRY[[GrowthForm]] = SubsetTRY$growth_form
if(length(Woodiness) > 0L) TidyTRY[[Woodiness]] = SubsetTRY$plant_woodiness
}#end if ("growth_form" %in% names(TidyTRY))
# Save look-up tables
cat0(" - Save the TRY data base with harmonised taxonomy to ",basename(Rdata_TaxonomyTRY))
dummy = save( list = c( "TidyTRY","load_ancil")
, file = Rdata_TaxonomyTRY
, compress = "xz"
, compression_level = 9
)#end save
}#end if (file.exists(Rdata_TaxonomyTRY))
if (file.exists(Rdata_TidyTRY)){
# Load data.
cat0(" + Reload TRY with harmonised geography and taxonomy from ",basename(Rdata_TidyTRY),".")
dummy = load(Rdata_TidyTRY)
}else{
# Load some files which will likely be updated as the code is developed.
source(file.path(util_path,"TRY_Harmonise_Utils.r"),chdir=TRUE)
source(file.path(util_path,"FindBestDistr.r" ),chdir=TRUE)
source(file.path(util_path,"FilterOutliers.r" ),chdir=TRUE)
source(file.path(util_path,"rconstants.r" ),chdir=TRUE)
source(file.path(util_path,"unitlist.r" ),chdir=TRUE)
# Replace generic vulnerability curve variable with the specific ones used in this workflow:
cat0(" - Harmonise list of traits and ancillary variables.")
if ("xylem_pxx" %in% try_trait$Name){
try_xylem = try_trait %>% filter(grepl(pattern="xylem_pxx",x=Name))
xylem_vars = strsplit(x=try_xylem$Supporting,split=";")[[1]]
xylem_pval = as.integer(gsub(pattern="^xylem_p",replacement="",x=xylem_vars))
try_xylem =
try_xylem[rep(1,times=length(xylem_pval)),,drop=FALSE] %>%
mutate( Name = xylem_vars
, Desc = mapply(FUN=gsub,replacement=xylem_pval,x=Desc,MoreArgs=list(pattern="xx"))
, Supporting = NA_character_
)#end mutate
try_trait = bind_rows(try_trait,try_xylem) %>%
arrange(Name) %>%
filter(! (Name %in% "xylem_pxx"))
}#end if ("xylem_pxx" %in% names(try_trait))
# Replace generic leaf texture variable with the specific ones used in this workflow:
if ("leaf_texture" %in% try_trait$Name){
try_ltext = try_trait %>% filter(grepl(pattern="leaf_texture",x=Name))
ltext_vars = strsplit(x=try_ltext$Supporting,split=";")[[1]]
ltext_prss = ! grepl(pattern="_f_",x=ltext_vars)
ltext_desc = rep(NA_character_,times=length(ltext_vars))
ltext_desc[ltext_vars %in% "leaf_f_tear" ] = "Leaf tensile strength"
ltext_desc[ltext_vars %in% "leaf_f_punct"] = "Leaf puncturability strength"
ltext_desc[ltext_vars %in% "leaf_t_tear" ] = "Leaf tensile toughness"
ltext_desc[ltext_vars %in% "leaf_t_punct"] = "Leaf puncturability toughness"
try_ltext =
try_ltext[rep(1,times=length(ltext_vars)),,drop=FALSE] %>%
mutate( Name = ltext_vars
, Desc = ltext_desc
, Unit = ifelse( test = ltext_prss
, yes = ifelse( test = Unit %in% c("nomm")
, yes = "nomm2"
, no = ifelse( test = Unit %in% "knom"
, yes = "mnom2"
, no = stop("Invalid units for leaf texture!")
)#end ifelse
)#end ifelse
, no = Unit
)#end ifelse
, Supporting = NA_character_
)#end mutate
try_trait = bind_rows(try_trait,try_ltext) %>%
arrange(Name) %>%
filter(! (Name %in% "leaf_texture"))
}#end if ("xylem_pxx" %in% names(try_trait))
# Update the list of traits and ancillary data
if ("Supporting" %in% names(try_trait)){
try_trait = try_trait %>% filter(is.na(Supporting)) %>% select( ! Supporting)
try_ancil = try_ancil %>% filter(is.na(Supporting)) %>% select( ! Supporting)
}#end if ("Supporting" %in% names(try_trait)){
# Standardise count. If not provided, assume it is equivalent to a single measurement.
cat0(" - Assign single count for data with no information or invalid information.")
TidyTRY$Count = ifelse( test = is.na(TidyTRY$Count) | TidyTRY$Count %le% 0
, yes = 1
, no = TidyTRY$Count
)#end ifelse
# Run a sanity check and eliminate unrealistic values and outliers.
cat0(" - Eliminate unrealistic values.")
WhichTrait = try_trait$TraitID[try_trait$Type %in% "numeric"]
CntWhichTrait = length(WhichTrait)
for (w in sequence(CntWhichTrait)){
TraitIDNow = WhichTrait[w]
z = match(TraitIDNow,try_trait$TraitID)
# Handy aliases
NameNow = try_trait$Name [z]
DescNow = try_trait$Desc [z]
TransNow = try_trait$Trans [z]
cat0(" - Filter data for ",tolower(DescNow),".")
# Identify values that work with transformation.
if (TransNow %in% c("identity","cbrt")){
# Identity and cube root: any finite value is reasonable
GoodData = is.finite(TidyTRY[[NameNow]])
}else if (TransNow %in% c("sqrt")){
# Square root: non-negative values only
GoodData = TidyTRY[[NameNow]] %ge% 0.
}else if (TransNow %in% c("log")){
# Logarithm: positive values only
GoodData = TidyTRY[[NameNow]] %gt% 0.
}else if (TransNow %in% c("neglog")){
# Negative logarithm: negative values only
GoodData = TidyTRY[[NameNow]] %lt% 0.
}#end if (TransNow %in% c("identity","cbrt"))
# Temporary fix due to typo
if (NameNow %in% "leaf_thick") GoodData = GoodData & (TidyTRY[[NameNow]] %gt% 0.)
# Eliminate trouble making values
TidyTRY[[NameNow]][! GoodData] = NA_real_
# Additionally, we check the global distribution of data to eliminate outliers that
# are likely bogus
TidyTRY[[NameNow]] = FilterOutliers( x = TidyTRY[[NameNow]]
, count = TidyTRY$Count
, CntMin = OutlierCntMin
, zFineMin = zFineMin
, zFineMax = zFineMax
, verbose = TRUE
)#end FilterOutliers
}#end for (w in sequence(CntWhichTrait)){
# Manually delete photosynthesis rate that looked modelled
cat0(" - Manually delete suspiciously constant data.")
MassAmax = try_trait$Name[try_trait$TraitID %in% 40L ]
AreaAmax = try_trait$Name[try_trait$TraitID %in% 53L ]
if (length(MassAmax) > 0L){
IsOdd = ( TidyTRY$Author %in% "Serge Sheremetev" ) & ( TidyTRY[[MassAmax]] %eq% 3.)
TidyTRY[[MassAmax]][IsOdd] = NA_real_
if (length(AreaAmax) > 0L) TidyTRY[[AreaAmax]][IsOdd] = NA_real_
}#end if (length(MassAmax) > 0L)
# Harmonise habitats
cat0(" - Harmonise biome/habitat information.")
if (use_realm %in% "NeoTropical"){
# Build a subset of the tibble for testing tree likelihood.
Biome = try_ancil$Name[try_ancil$DataID %in% 193L]
Habitat = try_ancil$Name[try_ancil$DataID %in% 1863L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( biome = if(length(Biome ) == 0L){EmptyChar}else{TidyTRY[[Biome ]]}
, habitat = if(length(Habitat ) == 0L){EmptyChar}else{TidyTRY[[Habitat]]}
)#end tibble
# Harmonise biome classes
SubsetTRY = TRY_Harmonise_NeoTropical_Biome(SubsetTRY)
TidyTRY[[Biome]] = SubsetTRY[[Biome]]
}#end if (fitrealm %in% "NeoTropical")
# Load additional info on categorical variables
if (! is.na(addinfo_categ)){
cat0(" - Load additional information on categorical traits.")
# Identify the species that actually occur in the current data set.
CatExtra = read_csv(file=addinfo_categ,show_col_types = FALSE)
keep = CatExtra$AcceptedName %in% TidyTRY$ScientificName
#Map data to the table
idx = match(TidyTRY$ScientificName,CatExtra$AcceptedClass)
sel = ! is.na(idx)
# List all the traits that might be updated.
ColExtra = names(CatExtra)
ColTrait = which(grepl(pattern="^TraitID_",x=ColExtra))
# List all the traits that might be updated.
for (ic in ColTrait){
# Select trait
ExtraNow = ColExtra[ic]
TraitIDNow = as.integer(gsub(pattern="^TraitID_",replacement="",x=ExtraNow))
it = match(TraitIDNow,try_trait$TraitID)
TraitNow = try_trait$Name[it]
# Update the trait for all species
TidyTRY[[TraitNow]][sel] = CatExtra[[ExtraNow]][idx[sel]]
}#end for (i in seq_along(ColTrait))
}#end if (! is.na(addinfo_categ))
# Harmonise area-based and mass-based photosynthesis traits
PhotoID = c(40L,41L,53L,54L,185L,186L,269L,270L)
if (any(try_trait$TraitID %in% PhotoID)){
cat0(" - Find mass- and area-based photosynthesis traits.")
# Build a subset of the tibble for testing tree likelihood.
SLA = try_trait$Name[try_trait$TraitID %in% 3117L]
A_Amax = try_trait$Name[try_trait$TraitID %in% 53L]
A_Jmax = try_trait$Name[try_trait$TraitID %in% 269L]
A_Rdmax = try_trait$Name[try_trait$TraitID %in% 54L]
A_Vcmax = try_trait$Name[try_trait$TraitID %in% 186L]
M_Amax = try_trait$Name[try_trait$TraitID %in% 40L]
M_Jmax = try_trait$Name[try_trait$TraitID %in% 270L]
M_Rdmax = try_trait$Name[try_trait$TraitID %in% 41L]
M_Vcmax = try_trait$Name[try_trait$TraitID %in% 185L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, SLA = if(length(SLA ) == 0L){EmptyReal}else{TidyTRY[[SLA ]]}
, a_amax = if(length(A_Amax ) == 0L){EmptyReal}else{TidyTRY[[A_Amax ]]}
, a_jmax = if(length(A_Jmax ) == 0L){EmptyReal}else{TidyTRY[[A_Jmax ]]}
, a_rdmax = if(length(A_Rdmax) == 0L){EmptyReal}else{TidyTRY[[A_Rdmax]]}
, a_vcmax = if(length(A_Vcmax) == 0L){EmptyReal}else{TidyTRY[[A_Vcmax]]}
, m_amax = if(length(M_Amax ) == 0L){EmptyReal}else{TidyTRY[[M_Amax ]]}
, m_jmax = if(length(M_Jmax ) == 0L){EmptyReal}else{TidyTRY[[M_Jmax ]]}
, m_rdmax = if(length(M_Rdmax) == 0L){EmptyReal}else{TidyTRY[[M_Rdmax]]}
, m_vcmax = if(length(M_Vcmax) == 0L){EmptyReal}else{TidyTRY[[M_Vcmax]]}
)#end tibble
# Create a subset of the data set for finding the tree score.
SubsetTRY = TRY_Harmonise_Photosynthesis(x=SubsetTRY,am_fac=photo_a2m_factor)
# Copy back harmonised photosynthesis data
CopyFine = TidyTRY$ValueKind %in% c("Single","Unknown")
if(length(A_Amax ) != 0L) TidyTRY[[A_Amax ]][CopyFine] = SubsetTRY$a_amax [CopyFine]
if(length(A_Jmax ) != 0L) TidyTRY[[A_Jmax ]][CopyFine] = SubsetTRY$a_jmax [CopyFine]
if(length(A_Rdmax) != 0L) TidyTRY[[A_Rdmax]][CopyFine] = SubsetTRY$a_rdmax[CopyFine]
if(length(A_Vcmax) != 0L) TidyTRY[[A_Vcmax]][CopyFine] = SubsetTRY$a_vcmax[CopyFine]
if(length(M_Amax ) != 0L) TidyTRY[[M_Amax ]][CopyFine] = SubsetTRY$m_amax [CopyFine]
if(length(M_Jmax ) != 0L) TidyTRY[[M_Jmax ]][CopyFine] = SubsetTRY$m_jmax [CopyFine]
if(length(M_Rdmax) != 0L) TidyTRY[[M_Rdmax]][CopyFine] = SubsetTRY$m_rdmax[CopyFine]
if(length(M_Vcmax) != 0L) TidyTRY[[M_Vcmax]][CopyFine] = SubsetTRY$m_vcmax[CopyFine]
}#end if (any(try_trait$ID %in% PhotoID))
# Harmonise leaf density and leaf thickness.
LeafArchID = c(3117L,46L,48L)
if (any(try_trait$TraitID %in% LeafArchID)){
cat0(" - Fill in leaf density and leaf thickness.")
# Build a subset of the tibble for testing tree likelihood.
SLA = try_trait$Name[try_trait$TraitID %in% 3117L]
LeafDens = try_trait$Name[try_trait$TraitID %in% 48L]
LeafThick = try_trait$Name[try_trait$TraitID %in% 46L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, SLA = if(length(SLA ) == 0L){EmptyReal}else{TidyTRY[[SLA ]]}
, leaf_dens = if(length(LeafDens ) == 0L){EmptyReal}else{TidyTRY[[LeafDens ]]}
, leaf_thick = if(length(LeafThick) == 0L){EmptyReal}else{TidyTRY[[LeafThick]]}
)#end tibble
# Create a subset of the data set for finding the tree score.
SubsetTRY = TRY_Harmonise_LeafArchitecture(x=SubsetTRY,td_fac=leaf_thick2dens_factor)
# Copy back harmonised photosynthesis data
CopyFine = TidyTRY$ValueKind %in% c("Single","Unknown")
if(length(LeafDens ) != 0L) TidyTRY[[LeafDens ]][CopyFine] = SubsetTRY$leaf_dens [CopyFine]
if(length(LeafThick) != 0L) TidyTRY[[LeafThick]][CopyFine] = SubsetTRY$leaf_thick[CopyFine]
}#end if (any(try_trait$ID %in% LeafArchID))
# Harmonise leaf biomass and leaf area.
LeafBioAreaID = c(3117L,129L,410L)
if (any(try_trait$TraitID %in% LeafBioAreaID)){
cat0(" - Fill in leaf area and leaf oven-dry mass.")
# Build a subset of the tibble for testing tree likelihood.
SLA = try_trait$Name[try_trait$TraitID %in% 3117L]
LeafMass = try_trait$Name[try_trait$TraitID %in% 129L]
LeafArea = try_trait$Name[try_trait$TraitID %in% 410L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, SLA = if(length(SLA ) == 0L){EmptyReal}else{TidyTRY[[SLA ]]}
, bleaf = if(length(LeafMass) == 0L){EmptyReal}else{TidyTRY[[LeafMass]]}
, leaf_area = if(length(LeafArea) == 0L){EmptyReal}else{TidyTRY[[LeafArea]]}
)#end tibble
# Create a subset of the data set for finding the tree score.
SubsetTRY = TRY_Harmonise_LeafBioArea(x=SubsetTRY,am_fac=leaf_area2mass_factor)
# Copy back harmonised photosynthesis data
CopyFine = TidyTRY$ValueKind %in% c("Single","Unknown")
if(length(LeafMass) != 0L) TidyTRY[[LeafMass]][CopyFine] = SubsetTRY$bleaf [CopyFine]
if(length(LeafArea) != 0L) TidyTRY[[LeafArea]][CopyFine] = SubsetTRY$leaf_area[CopyFine]
}#end if (any(try_trait$ID %in% LeafBioAreaID))
# Harmonise crown diameter and crown area
CrownAreaID = c(20L)
CrownDiamID = c(324L)
if (any(try_trait$TraitID %in% CrownAreaID) && any(try_trait$TraitID %in% CrownDiamID)){
cat0(" - Fill in crown size.")
# Build a subset of the tibble for filling information.
CrownArea = try_trait$Name[(try_trait$TraitID %in% 20L)]
CrownDiam = try_trait$Name[(try_trait$TraitID %in% 324L)]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, Author = TidyTRY$Author
, crown_area = if(length(CrownArea) == 0L){EmptyReal}else{TidyTRY[[CrownArea]]}
, crown_diam = if(length(CrownDiam) == 0L){EmptyReal}else{TidyTRY[[CrownDiam]]}
)#end tibble
# Create a subset of the data set for finding the tree score.
SubsetTRY = TRY_Harmonise_CrownSize(x=SubsetTRY,cd2ca_fac=crown_diam2area_factor)
# Copy back harmonised photosynthesis data
CopyFine = TidyTRY$ValueKind %in% c("Single","Unknown")
if(length(CrownArea) != 0L) TidyTRY[[CrownArea]][CopyFine] = SubsetTRY$crown_area[CopyFine]
if(length(CrownDiam) != 0L) TidyTRY[[CrownDiam]][CopyFine] = SubsetTRY$crown_diam[CopyFine]
}#end if (any(try_trait$TraitID %in% CrownAreaID) && any(try_trait$TraitID %in% CrownDiamID))
# Harmonise area-based and mass-based concentration of leaf components.
LeafAMConcID = c(13L,14L,15L,50L,51L,164L,413L,570L,809L,491L)
if (any(try_trait$TraitID %in% LeafAMConcID)){
cat0(" - Fill in leaf area and leaf oven-dry mass.")
# Build a subset of the tibble for testing tree likelihood.
SLA = try_trait$Name[try_trait$TraitID %in% 3117L]
LeafCarbonM = try_trait$Name[try_trait$TraitID %in% 13L]
LeafCarbonA = try_trait$Name[try_trait$TraitID %in% 510L]
LeafNitroM = try_trait$Name[try_trait$TraitID %in% 14L]
LeafNitroA = try_trait$Name[try_trait$TraitID %in% 50L]
LeafPhosphM = try_trait$Name[try_trait$TraitID %in% 15L]
LeafPhosphA = try_trait$Name[try_trait$TraitID %in% 51L]
LeafChloroM = try_trait$Name[try_trait$TraitID %in% 164L]
LeafChloroA = try_trait$Name[try_trait$TraitID %in% 413L]
LeafCarotM = try_trait$Name[try_trait$TraitID %in% 809L]
LeafCarotA = try_trait$Name[try_trait$TraitID %in% 491L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, SLA = if(length(SLA ) == 0L){EmptyReal}else{TidyTRY[[SLA ]]}
, leaf_m_carbon = if(length(LeafCarbonM) == 0L){EmptyReal}else{TidyTRY[[LeafCarbonM]]}
, leaf_a_carbon = if(length(LeafCarbonA) == 0L){EmptyReal}else{TidyTRY[[LeafCarbonA]]}
, leaf_m_nitro = if(length(LeafNitroM ) == 0L){EmptyReal}else{TidyTRY[[LeafNitroM ]]}
, leaf_a_nitro = if(length(LeafNitroA ) == 0L){EmptyReal}else{TidyTRY[[LeafNitroA ]]}
, leaf_m_phosph = if(length(LeafPhosphM) == 0L){EmptyReal}else{TidyTRY[[LeafPhosphM]]}
, leaf_a_phosph = if(length(LeafPhosphA) == 0L){EmptyReal}else{TidyTRY[[LeafPhosphA]]}
, leaf_m_chloro = if(length(LeafChloroM) == 0L){EmptyReal}else{TidyTRY[[LeafChloroM]]}
, leaf_a_chloro = if(length(LeafChloroA) == 0L){EmptyReal}else{TidyTRY[[LeafChloroA]]}
, leaf_m_carot = if(length(LeafCarotM ) == 0L){EmptyReal}else{TidyTRY[[LeafCarotM ]]}
, leaf_a_carot = if(length(LeafCarotA ) == 0L){EmptyReal}else{TidyTRY[[LeafCarotA ]]}
)#end tibble
# Create a subset of the data set for finding the tree score.
SubsetTRY = TRY_Harmonise_AreaMassConc(x=SubsetTRY,a2m_fac=leaf_ax2mx_factor)
# Copy back harmonised photosynthesis data
CopyFine = TidyTRY$ValueKind %in% c("Single","Unknown")
if(length(LeafCarbonM) != 0L) TidyTRY[[LeafCarbonM]][CopyFine] = SubsetTRY$leaf_m_carbon[CopyFine]
if(length(LeafCarbonA) != 0L) TidyTRY[[LeafCarbonA]][CopyFine] = SubsetTRY$leaf_a_carbon[CopyFine]
if(length(LeafNitroM ) != 0L) TidyTRY[[LeafNitroM ]][CopyFine] = SubsetTRY$leaf_m_nitro [CopyFine]
if(length(LeafNitroA ) != 0L) TidyTRY[[LeafNitroA ]][CopyFine] = SubsetTRY$leaf_a_nitro [CopyFine]
if(length(LeafPhosphM) != 0L) TidyTRY[[LeafPhosphM]][CopyFine] = SubsetTRY$leaf_m_phosph[CopyFine]
if(length(LeafPhosphA) != 0L) TidyTRY[[LeafPhosphA]][CopyFine] = SubsetTRY$leaf_a_phosph[CopyFine]
if(length(LeafChloroM) != 0L) TidyTRY[[LeafChloroM]][CopyFine] = SubsetTRY$leaf_m_chloro[CopyFine]
if(length(LeafChloroA) != 0L) TidyTRY[[LeafChloroA]][CopyFine] = SubsetTRY$leaf_a_chloro[CopyFine]
if(length(LeafCarotM ) != 0L) TidyTRY[[LeafCarotM ]][CopyFine] = SubsetTRY$leaf_m_carot [CopyFine]
if(length(LeafCarotA ) != 0L) TidyTRY[[LeafCarotA ]][CopyFine] = SubsetTRY$leaf_a_carot [CopyFine]
}#end if (any(try_trait$ID %in% LeafBioAreaID))
# Harmonise leaf carbon, nitrogen and phosphorus ratios
LeafStoichID = c(13L,14L,15L,146L,151L,56L)
if (any(try_trait$TraitID %in% LeafStoichID)){
cat0(" - Fill in leaf stoichiometry.")
# Build a subset of the tibble for testing tree likelihood.
LeafC = try_trait$Name[try_trait$TraitID %in% 13L]
LeafN = try_trait$Name[try_trait$TraitID %in% 14L]
LeafP = try_trait$Name[try_trait$TraitID %in% 15L]
LeafC2N = try_trait$Name[try_trait$TraitID %in% 146L]
LeafC2P = try_trait$Name[try_trait$TraitID %in% 151L]
LeafN2P = try_trait$Name[try_trait$TraitID %in% 56L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, Author = TidyTRY$Author
, leaf_c = if(length(LeafC ) == 0L){EmptyReal}else{TidyTRY[[LeafC ]]}
, leaf_n = if(length(LeafN ) == 0L){EmptyReal}else{TidyTRY[[LeafN ]]}
, leaf_p = if(length(LeafP ) == 0L){EmptyReal}else{TidyTRY[[LeafP ]]}
, leaf_c2n = if(length(LeafC2N) == 0L){EmptyReal}else{TidyTRY[[LeafC2N ]]}
, leaf_c2p = if(length(LeafC2P) == 0L){EmptyReal}else{TidyTRY[[LeafC2P ]]}
, leaf_n2p = if(length(LeafN2P) == 0L){EmptyReal}else{TidyTRY[[LeafN2P ]]}
)#end tibble
# Create a subset of the data set for finding the tree score.
SubsetTRY = TRY_Harmonise_LeafStoichiometry(x=SubsetTRY,cn_fac=leaf_c2n_factor,cp_fac=leaf_c2p_factor)
# Copy back harmonised photosynthesis data
CopyFine = TidyTRY$ValueKind %in% c("Single","Unknown")
if(length(LeafC ) != 0L) TidyTRY[[LeafC ]][CopyFine] = SubsetTRY$leaf_c [CopyFine]
if(length(LeafN ) != 0L) TidyTRY[[LeafN ]][CopyFine] = SubsetTRY$leaf_n [CopyFine]
if(length(LeafP ) != 0L) TidyTRY[[LeafP ]][CopyFine] = SubsetTRY$leaf_p [CopyFine]
if(length(LeafC2N) != 0L) TidyTRY[[LeafC2N]][CopyFine] = SubsetTRY$leaf_c2n[CopyFine]
if(length(LeafC2P) != 0L) TidyTRY[[LeafC2P]][CopyFine] = SubsetTRY$leaf_c2p[CopyFine]
if(length(LeafN2P) != 0L) TidyTRY[[LeafN2P]][CopyFine] = SubsetTRY$leaf_n2p[CopyFine]
}#end if (any(try_trait$ID %in% LeafArchID))
# Harmonise leaf texture and thickness
LeafTextureID = c(2L)
LeafThickID = c(46L)
if (any(try_trait$TraitID %in% LeafTextureID) && any(try_trait$TraitID %in% LeafTextureID)){
cat0(" - Fill in leaf texture properties.")
# Build a subset of the tibble for filling information.
LeafFTear = try_trait$Name[(try_trait$TraitID %in% 2L) & (try_trait$Name %in% "leaf_f_tear" )]
LeafTTear = try_trait$Name[(try_trait$TraitID %in% 2L) & (try_trait$Name %in% "leaf_t_tear" )]
LeafFPunct = try_trait$Name[(try_trait$TraitID %in% 2L) & (try_trait$Name %in% "leaf_f_punct")]
LeafTPunct = try_trait$Name[(try_trait$TraitID %in% 2L) & (try_trait$Name %in% "leaf_t_punct")]
LeafThick = try_trait$Name[try_trait$TraitID %in% 46L]
EmptyChar = rep(NA_character_,times=nrow(TidyTRY))
EmptyReal = rep(NA_real_ ,times=nrow(TidyTRY))
# Create a subset of the data set for finding the tree score.
SubsetTRY = tibble( ScientificName = TidyTRY$ScientificName
, Author = TidyTRY$Author
, leaf_f_tear = if(length(LeafFTear ) == 0L){EmptyReal}else{TidyTRY[[LeafFTear ]]}
, leaf_t_tear = if(length(LeafTTear ) == 0L){EmptyReal}else{TidyTRY[[LeafTTear ]]}
, leaf_f_punct = if(length(LeafFPunct) == 0L){EmptyReal}else{TidyTRY[[LeafFPunct]]}
, leaf_t_punct = if(length(LeafTPunct) == 0L){EmptyReal}else{TidyTRY[[LeafTPunct]]}
, leaf_thick = if(length(LeafThick ) == 0L){EmptyReal}else{TidyTRY[[LeafThick ]]}
)#end tibble
# Create a subset of the data set for finding the tree score.
SubsetTRY = TRY_Harmonise_LeafTexture(x=SubsetTRY,thtx_fac=leaf_thick2text_factor)
# Copy back harmonised photosynthesis data
CopyFine = TidyTRY$ValueKind %in% c("Single","Unknown")
if(length(LeafFTear ) != 0L) TidyTRY[[LeafFTear ]][CopyFine] = SubsetTRY$leaf_f_tear [CopyFine]
if(length(LeafTTear ) != 0L) TidyTRY[[LeafTTear ]][CopyFine] = SubsetTRY$leaf_t_tear [CopyFine]
if(length(LeafFPunct) != 0L) TidyTRY[[LeafFPunct]][CopyFine] = SubsetTRY$leaf_f_punct[CopyFine]
if(length(LeafTPunct) != 0L) TidyTRY[[LeafTPunct]][CopyFine] = SubsetTRY$leaf_t_punct[CopyFine]
if(length(LeafThick ) != 0L) TidyTRY[[LeafThick ]][CopyFine] = SubsetTRY$leaf_thick [CopyFine]
}#end if (any(try_trait$ID %in% LeafArchID))
# Exclude data from life forms that should not be analysed
cat0(" - Remove data from life forms that are not part of this analysis.")
GrowthForm = try_trait$Name[try_trait$TraitID %in% 42L]
if (use_lifeclass %in% "FlowerTrees"){
KeepTRY = ( ( TidyTRY$Class %in% c("Liliopsida","Magnoliopsida") )
& ( TidyTRY[[GrowthForm]] %in% "Tree" )
)#end KeepTRY
}else if (use_lifeclass %in% "Shrubs"){
KeepTRY = TidyTRY[[GrowthForm]] %in% "Shrub"
}else if (use_lifeclass %in% "Grasses"){
KeepTRY = TidyTRY[[GrowthForm]] %in% "Grass-Herb"
}else if (use_lifeclass %in% "FlowerPlants"){
KeepTRY = TidyTRY$Class %in% c("Liliopsida","Magnoliopsida")
}else if (use_lifeclass %in% "Pinopsida"){
KeepTRY = TidyTRY$Class %in% c("Pinopsida")
}else if (use_lifeclass %in% "SeedPlants"){
KeepTRY = TidyTRY$Class %in% c("Cycadopsida","Ginkgoopsida","Gnetopsida","Liliopsida","Magnoliopsida","Pinopsida")
}else if (use_lifeclass %in% "Plantae"){
KeepTRY = rep(TRUE,nrow(TidyTRY))
}#end if
TidyTRY = TidyTRY[KeepTRY,,drop=FALSE]
# Remove columns that contain no information.
cat0(" - Remove columns with no information.")
KeepTRY = unlist(TidyTRY %>% summarise_all( ~ any(! is.na(.x))))
EraseTRY = names(KeepTRY)[! KeepTRY]
KeepTRY = names(KeepTRY)[KeepTRY]
TidyTRY = TidyTRY %>% select(all_of(KeepTRY))
# Identify rows with trait or ancillary data.
cat0(" - Remove rows with no information.")
ColNames = names(TidyTRY)
IsData = which(ColNames %in% c(try_trait$Name,try_ancil$Name))
WhichData = ColNames[IsData]
CntData = length(IsData)
AnyData = rep(FALSE,times=nrow(TidyTRY))
for (w in sequence(CntData)){
DataNow = ColNames[w]
AnyData = AnyData | (! is.na(TidyTRY[[DataNow]]))
}#end for (w in sequence(CntData))
# Remove rows with no data.
TidyTRY = TidyTRY[AnyData,,drop=FALSE]
# Exclude traits and ancillary variables that no longer have data
try_trait = try_trait %>% filter(Name %in% names(TidyTRY))
try_ancil = try_ancil %>% filter(Name %in% names(TidyTRY))
# Save look-up tables
cat0(" - Save tidy TRY data base to ",basename(Rdata_TidyTRY))
dummy = save( list = c( "TidyTRY","load_ancil", "try_trait","try_ancil")
, file = Rdata_TidyTRY
, compress = "xz"
, compression_level = 9
)#end save
}#end if (file.exists(Rdata_TidyTRY))