1 About this notebook

This notebook includes code that I have used in the past and aim to improve.

Packages used plyr, dplyr, minpack.lm, readbulk, and GLDEX. See citation at the end of script.

My GitHub repository for these notebooks


2 Process chamber-based soil GHG flux data

2.1 GHG data from gas chromatography

This script calculates gas (carbon dioxide, methane, nitrous oxide) flux in mg m-2 s-1 from gas chromatography data (concentration in ppm) using a linear regression fit.

#Load libraries
library("plyr")
library("dplyr")
library("minpack.lm")
library("GLDEX")

#Set directory and load data
Path <- setwd("D:/Git/project-3/raw-data/field/")
data <- read.csv("gasvials.csv", header = TRUE)
head(data)
str(data)
## 'data.frame':    498 obs. of  17 variables:
##  $ CH4      : num  1.89 1.89 1.92 1.92 1.93 ...
##  $ N2O      : num  0.331 0.333 0.333 0.332 0.335 ...
##  $ Bottle   : int  101 102 103 104 105 106 107 108 109 110 ...
##  $ dt       : int  0 1 3 5 10 20 0 1 3 5 ...
##  $ dts      : int  0 60 120 300 600 1200 0 60 120 300 ...
##  $ CollarA  : num  0.0337 0.0337 0.0337 0.0337 0.0337 ...
##  $ ChamberV : num  0.0088 0.0088 0.0088 0.0088 0.0088 ...
##  $ File     : chr  "00:49" "00:49" "00:49" "00:49" ...
##  $ Year     : int  2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
##  $ Intensity: chr  "H" "H" "H" "H" ...
##  $ Plot     : chr  "1" "1" "1" "1" ...
##  $ Collar   : chr  "A" "A" "A" "A" ...
##  $ CollarH  : num  4.5 4.5 4.5 4.5 4.5 4.5 4.2 4.2 4.2 4.2 ...
##  $ ST       : num  15.8 15.8 15.8 15.8 15.8 ...
##  $ SM       : num  29.2 29.2 29.2 29.2 29.2 ...
##  $ Pressure : int  1020 1020 1020 1020 1020 1020 1020 1020 1020 1020 ...
##  $ ChamberV1: num  0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 ...
#Check and remove missing data by row
colSums(is.na(data))
##       CH4       N2O    Bottle        dt       dts   CollarA  ChamberV      File 
##         0         0         0         0         0         0         0         0 
##      Year Intensity      Plot    Collar   CollarH        ST        SM  Pressure 
##         0         0         0         0         0         0         0         0 
## ChamberV1 
##         0
data <- na.exclude(data)

#Pick specific bottles (closure time) 
#This code selects measurements from bottle 113 to bottle 118
sub <- data %>% filter(Bottle >= 113 & Bottle <= 118)

#Model and calculate standard deviation
m1 <- lm(CH4 ~ dt, data = sub)
sd2 <- sd(m1$fitted.values)

#Plot regression line and observations
plot(CH4 ~ dt, data = sub)
abline(m1, col = "blue", lwd = 2)
abline(m1$coefficients[1] + sd2, m1$coefficients[2])
abline(m1$coefficients[1] - sd2, m1$coefficients[2])

##Remove observations as necessary and refit model with sub1.
#If removal is not necessary, use the rest of script with sub instead of sub1 
#This code removes rows 1 and 4 from the sub dataset
sub1 <- sub[-c(1,4),]
m1 <- lm(CH4 ~ dt, data = sub1)
sd2 <- sd(m1$fitted.values)
plot(CH4 ~ dt, data = sub1)
abline(m1, col = "blue", lwd = 2)
abline(m1$coefficients[1] + sd2, m1$coefficients[2])
abline(m1$coefficients[1] - sd2, m1$coefficients[2])

#Add fitted values to dataframe
sub1$CH4fitted <- fitted(m1)

#Calculate and add fitted diff to dataframe
sub1$CH4diff <- sub1$CH4fitted - 
  lag(sub1$CH4fitted, default = first(sub1$CH4fitted))

Flux calculation
This code calculates CH4 flux in mg CH4 m-2 s-1
Remember to adjust the flux calculation according to the gas respective molar mass, CO2 = 44.01, N2O = 44.013, CH4 = 16.042

flux <- sub1$CH4diff * 16.042 * sub1$Pressure * 
  273.15 / (22.414 * 1013 *(sub1$ST + 273.15)) *
  (sub1$ChamberV / sub1$CollarA) / sub1$dts

#Remove NaN and false zeros
df <- as.data.frame(flux)
df <- na.omit(df)
df <- fun.zero.omit(df)

Write down average flux for chosen bottles
The mean CH4 flux for bottles 113-118 is

mean(df$flux)
## [1] -6.048425e-06

2.2 CO2 data from the Vaisala instrument

This script calculates carbon dioxide flux in mg m-2 s-1 from the Vaisala instrument (concentration in ppm) using a linear regression fit.

#Load libraries
library("readbulk")
library("plyr")
library("dplyr")

#Set directory and load data
raw_data <- read_bulk(directory = "D:/Git/project-3/raw-data/vaisala-raw/",
                      subdirectories = c("01", "02", "03", "04"),
                      extension = ".csv")

#Replace values in "column T.II....C" with the non-NA values from column "TGAS.I....C" creating new column "TempChamber"
raw_data$TempChamber <- ifelse(is.na(raw_data$T.II....C),
                               raw_data$TGAS.I....C, raw_data$T.II....C)

#Rename variables to nicer things
names(raw_data)[names(raw_data) == "X"] <- "Time"
names(raw_data)[names(raw_data) == "CO2.I...ppm"] <- "CO2"
names(raw_data)[names(raw_data) == "RH.II...."] <- "RH"
names(raw_data)[names(raw_data) == "Subdirectory"] <- "Vaisala"

#Remove unnecessary columns 
#This code removes 3rd and 7th columns
raw_data <- select(raw_data, -c(3, 7))

#Grab metadata (pick the correct path)
fl <- dir("D:/Git/project-3/raw-data/vaisala-raw/")[grepl('\\.csv$', dir("D:/Git/project-3/raw-data/vaisala-raw/"))]
fl <- paste("D:/Git/project-3/raw-data/vaisala-raw/", fl, sep = '')
meta <- do.call(rbind,
                lapply(fl, read.csv, as.is = TRUE))

#Merge datasets by file name keeping order
names(meta)[names(meta) == "File"] <- "File"
proc_data <- join(raw_data, meta,
                  type = "inner")

#Remove missing Vaisala data
colSums(is.na(proc_data))
##        Time         CO2          RH     Vaisala        File TempChamber 
##           0           4           0           0           0           3 
##     Closure      Calib1      Calib2        Year   Intensity        Plot 
##           0           0           0           0           0           0 
##      Collar     CollarA    ChamberV    Pressure          ST          SM 
##           0           0           0           0           0           0
proc_data <- na.exclude(proc_data)

#Calibrate CO2 according to Vaisala
proc_data$CO2calib <- with (proc_data, (CO2 - Calib1) / Calib2)

#To calculate dt we need to to convert Time from factor to POSIXct
proc_data$Datetime <- as.POSIXct(paste(proc_data$Time,tz = ""),
                                 format = "%d/%m/%Y %H:%M:%S")

#Pick specific Vaisala and closure (file)  
#This code picked Vaisala 4 and closure 8
sub <- subset(proc_data, Vaisala == "04" & Closure == "8")
sub$dt <- as.numeric(sub$Datetime - sub$Datetime[1])
model1 <- lm(CO2calib ~ dt, data = sub)
plot(CO2calib ~ dt, data = sub)
abline(model1, col = "blue", lwd = 2)

#Add fitted values to dataframe
sub$CO2fitted <- fitted(model1)

#Calculate average CO<sub>2</sub>
flux <- as.numeric(coef(model1)[2] *
                     (sub$ChamberV / sub$CollarA) *
                     (44.01 / 22.414) * (273.15 / (273.15 + sub$Temp)))

The mean flux for Vaisala 4 and enclosure 8 is

mean(flux)
## [1] 0.1005061

3 Analyse soil CO2 emissions

#Load libraries

library("plyr")
library("dplyr")
library("ggplot2")
library("lme4")
library("extrafont")

myFont <- "Segoe UI"
windowsFonts(mono  = myFont)
setwd("D:/Git/project-1/processed-data/")
data <- read.table(file = "Database_fluxes_v0.txt",
                   header = TRUE,
                   dec = ".")

#Renaming all variables
names(data)[names(data) == "fire"] <- "TAF"
names(data)[names(data) == "tsoil"] <- "ST"
names(data)[names(data) == "msoil"] <- "SM"
str(data)
## 'data.frame':    216 obs. of  8 variables:
##  $ TAF   : int  179 179 179 179 179 179 179 179 179 179 ...
##  $ plot  : chr  "a" "a" "b" "b" ...
##  $ collar: int  1 2 3 4 5 6 1 2 3 4 ...
##  $ month : chr  "maio" "maio" "maio" "maio" ...
##  $ site  : chr  "179years" "179years" "179years" "179years" ...
##  $ co2   : num  0.101 0.0669 0.1075 0.0893 0.0846 ...
##  $ ST    : num  9 8.75 11.05 9.4 10.65 ...
##  $ SM    : num  16.4 18.8 14.4 7.8 15.3 ...
#Convert to factor
data$plot<-factor(data$plot)
data$collar<-factor(data$collar)
data$month<-factor(data$month)
data$site<-factor(data$site)

#Remove missing data
colSums(is.na(data))
##    TAF   plot collar  month   site    co2     ST     SM 
##      0      0      0      0      0      0     11     11
data1 <- na.exclude(data)

#Change the unit to g m-2d-1
data1$co2 <- data1$co2 * 86.4

#Fit model with random structure based on model design using REML for final results
full <- lmer(co2 ~ site * ST + site * SM + (1 + ST | month) +
            (1 | plot/collar), REML = T, data = data1)

#Fit model with random structure based on model design using using ML for model selection and model validation 
fullML <- lmer(co2 ~ site * ST + site * SM + (1 + ST | month) +
            (1 | plot/collar), REML = F, data = data1)

#Model selection
knitr::kable(drop1(fullML, test="Chi"))
npar AIC LRT Pr(Chi)
NA 1244.697 NA NA
site:ST 5 1259.229 24.53153 0.0001716
site:SM 5 1252.645 17.94806 0.0030122
#Model validation: residuals vs. fitted
E1 <- resid(fullML)
F1 <- fitted(fullML)
par(mfrow = c(1, 1))
plot(x = F1, 
     y = E1,
     xlab = "Fitted values",
     ylab = "Residuals")
abline(h = 0, lty = 2, col = 1)

#Model estimates
summary(full)
## Linear mixed model fit by REML ['lmerMod']
## Formula: co2 ~ site * ST + site * SM + (1 + ST | month) + (1 | plot/collar)
##    Data: data1
## 
## REML criterion at convergence: 1193.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8354 -0.4295  0.0060  0.3401  4.1420 
## 
## Random effects:
##  Groups      Name        Variance  Std.Dev. Corr
##  collar:plot (Intercept) 6.954e+00 2.63708      
##  plot        (Intercept) 5.603e+00 2.36701      
##  month       (Intercept) 9.916e+00 3.14890      
##              ST          7.267e-04 0.02696  1.00
##  Residual                1.499e+01 3.87172      
## Number of obs: 205, groups:  collar:plot, 36; plot, 18; month, 6
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)      1.98012    7.04356   0.281
## site19years     -2.39753    7.59132  -0.316
## site34years      0.41092    7.34670   0.056
## site65years      0.51263    7.57206   0.068
## site76years    -12.51082    8.18110  -1.529
## site8years       7.55761    7.28666   1.037
## ST               0.93067    0.43804   2.125
## SM               0.12848    0.24694   0.520
## site19years:ST   0.26722    0.42491   0.629
## site34years:ST  -0.22885    0.42703  -0.536
## site65years:ST   0.37095    0.43402   0.855
## site76years:ST   0.90376    0.45103   2.004
## site8years:ST   -0.88181    0.39600  -2.227
## site19years:SM  -0.16800    0.26474  -0.635
## site34years:SM  -0.03273    0.27130  -0.121
## site65years:SM  -0.20354    0.28559  -0.713
## site76years:SM   0.26339    0.26947   0.977
## site8years:SM   -0.42289    0.27445  -1.541
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
#Visualize what the model is doing for soil temperature
MyData <- ddply(data1, 
                .(site), 
                summarize,
                ST = seq(min(ST), 
                            max(ST),
                            length = 30))
MyData["SM"] <- mean(data1$SM)

X <- model.matrix(~site*ST + site*SM,
                  data = MyData)
beta  <- fixef(full)
MyData$mu<-X%*%beta

MyData$SE <- sqrt( diag( X %*% vcov(full) %*% t(X)))
MyData$seup <- MyData$mu + 1.96 * MyData$SE #Lower bound
MyData$selow  <- MyData$mu - 1.96 * MyData$SE #Upper bound
knitr::kable(head(MyData))
site ST SM mu SE seup selow
179years 8.750000 15.83049 12.15737 3.109175 18.25135 6.063387
179years 9.055172 15.83049 12.44139 3.030924 18.38200 6.500775
179years 9.360345 15.83049 12.72540 2.956648 18.52043 6.930370
179years 9.665517 15.83049 13.00942 2.886655 18.66726 7.351572
179years 9.970690 15.83049 13.29343 2.821263 18.82311 7.763755
179years 10.275862 15.83049 13.57745 2.760799 18.98861 8.166279
cols1 <- c("8years" = "#F7BA7A", "19years" = "#C7A66F",
           "34years" = "#979364", "65years" = "#687F5A",
           "76years" = "#386C4F", "179years" = "#085844")

#CO2 vs. Soil temperature
p1 <- ggplot()
p1 <- p1 + geom_point(data = data1, 
                      aes(y = co2,
                          x = ST,
                          col = site,
                          shape = site), size = 1)
p1 <- p1 + xlab(expression('Soil temperature ('*~degree*C*')')) +
  ylab(bquote(''*g ~CO[2]~ m^-2~d^-1*''))
p1 <- p1 + geom_line(data = MyData, 
                     aes(x = ST, 
                         y = mu))
p1 <- p1 + geom_ribbon(data = MyData, 
                       aes(x = ST, 
                           ymax = seup, 
                           ymin = selow),
                       alpha = 0.1)
p1 <- p1 + facet_grid(. ~ factor(site,
                                 levels = c("8years", "19years", "34years",
                                            "65years","76years", "179years"),
                                 labels = c("8 years", "19 years", "34 years",
                                            "65 years","76 years", "179 years")))
p1 <- p1 + theme_bw() +
  theme(text=element_text(size = 12, family="mono")) +
  theme(legend.position = "none")
p1 <- p1 + scale_colour_manual(
  values = cols1,
  aesthetics = "colour")
p1 <- p1+  scale_shape_manual(name = "Time after fire",
                     breaks = c("8years", "19years", "34years",
                                "65years", "76years", "179years"),
                     values = c(8, 17, 15, 4, 7, 16))

#CO2 vs. Soil moisture
MyData <- ddply(data1, 
                .(site), 
                summarize,
                SM = seq(min(SM), 
                            max(SM),
                            length = 30))
MyData["ST"] <- mean(data1$SM)

X <- model.matrix(~site*ST + site*SM,
                  data = MyData)
beta  <- fixef(full)
MyData$mu<-X%*%beta

MyData$SE <- sqrt( diag( X %*% vcov(full) %*% t(X)))
MyData$seup <- MyData$mu + 1.96 * MyData$SE
MyData$selow  <- MyData$mu - 1.96 * MyData$SE
knitr::kable(head(MyData))
site SM ST mu SE seup selow
179years 4.950000 15.83049 17.34905 3.205163 23.63117 11.06693
179years 5.446552 15.83049 17.41285 3.137251 23.56186 11.26384
179years 5.943103 15.83049 17.47665 3.072732 23.49920 11.45409
179years 6.439655 15.83049 17.54044 3.011826 23.44362 11.63726
179years 6.936207 15.83049 17.60424 2.954755 23.39556 11.81292
179years 7.432759 15.83049 17.66804 2.901745 23.35546 11.98062
p11 <- ggplot()
p11 <- p11 + geom_point(data = data1, 
                      aes(y = co2,
                          x = SM,
                          col = site,
                          shape = site), size = 1)
p11 <- p11 + xlab("Soil moisture (%)") +
  ylab(bquote(''*g ~CO[2]~ m^-2~d^-1*''))
p11 <- p11 + geom_line(data = MyData, 
                     aes(x = SM, 
                         y = mu))
p11 <- p11 + geom_ribbon(data = MyData, 
                       aes(x = SM, 
                           ymax = seup, 
                           ymin = selow),
                       alpha = 0.1)
p11 <- p11 + facet_grid(. ~ factor(site,
                                 levels = c("8years", "19years", "34years",
                                            "65years","76years", "179years"),
                                 labels = c("8 years", "19 years", "34 years",
                                            "65 years","76 years", "179 years")))
p11 <- p11 + theme_bw() +
  theme(text=element_text(size = 12, family="mono")) +
  theme(legend.position= "none")
p11 <- p11 + scale_colour_manual(
  values = cols1,
  aesthetics = "colour")
p11 <- p11 +  scale_shape_manual(name = "Time after fire",
                              breaks = c("8years", "19years", "34years",
                                         "65years", "76years", "179years"),
                              values = c(8, 17, 15, 4, 7, 16))
p1

Linear mixed model predictions showing the relationships between soil CO2 efflux and soil temperature (soil moisture was held constant).

p11

Linear mixed model predictions showing the relationships between soil CO2 efflux and soil moisture (soil temperature was held constant).


4 References

Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data Analysis. Journal of Statistical Software, 40(1), 1-29. http://www.jstatsoft.org/v40/i01/.

Timur V. Elzhov, Katharine M. Mullen, Andrej-Nikolai Spiess and Ben Bolker (2016). minpack.lm: R Interface to the Levenberg-Marquardt Nonlinear Least-Squares Algorithm Found in MINPACK, Plus Support for Bounds. R package version 1.2-1. https://CRAN.R-project.org/package=minpack.lm

Hadley Wickham, Romain Francois, Lionel Henry and Kirill Muller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.7. https://CRAN.R-project.org/package=dplyr

Kieslich, P. J., & Henninger, F. (2016). Readbulk: An R package for reading and combining multiple data files. https://doi.org/10.5281/zenodo.596649

Steve Su, with contributions from: Diethelm Wuertz, Martin Maechler, Rmetrics core team members for low discrepancy algorithm, Juha Karvanen for L moments codes, Robert King for gld C codes, starship codes, Benjamin Dean for corrections, input in ks.gof code and R core team for histsu function. (2020). GLDEX: Fitting Single and Mixture of Generalised Lambda Distributions (RS and FMKL) using Various Methods. R package version 2.0.0.7. https://CRAN.R-project.org/package=GLDEX