1 About this notebook
This notebook includes code that I have used in the past and aim to improve.
Packages used plyr, dplyr, minpack.lm, readbulk, and GLDEX. See citation at the end of script.
My GitHub repository for these notebooks
2 Process chamber-based soil GHG flux data
2.1 GHG data from gas chromatography
This script calculates gas (carbon dioxide, methane, nitrous oxide) flux in mg m-2 s-1 from gas chromatography data (concentration in ppm) using a linear regression fit.
#Load libraries
library("plyr")
library("dplyr")
library("minpack.lm")
library("GLDEX")
#Set directory and load data
<- setwd("D:/Git/project-3/raw-data/field/")
Path <- read.csv("gasvials.csv", header = TRUE)
data head(data)
str(data)
## 'data.frame': 498 obs. of 17 variables:
## $ CH4 : num 1.89 1.89 1.92 1.92 1.93 ...
## $ N2O : num 0.331 0.333 0.333 0.332 0.335 ...
## $ Bottle : int 101 102 103 104 105 106 107 108 109 110 ...
## $ dt : int 0 1 3 5 10 20 0 1 3 5 ...
## $ dts : int 0 60 120 300 600 1200 0 60 120 300 ...
## $ CollarA : num 0.0337 0.0337 0.0337 0.0337 0.0337 ...
## $ ChamberV : num 0.0088 0.0088 0.0088 0.0088 0.0088 ...
## $ File : chr "00:49" "00:49" "00:49" "00:49" ...
## $ Year : int 2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
## $ Intensity: chr "H" "H" "H" "H" ...
## $ Plot : chr "1" "1" "1" "1" ...
## $ Collar : chr "A" "A" "A" "A" ...
## $ CollarH : num 4.5 4.5 4.5 4.5 4.5 4.5 4.2 4.2 4.2 4.2 ...
## $ ST : num 15.8 15.8 15.8 15.8 15.8 ...
## $ SM : num 29.2 29.2 29.2 29.2 29.2 ...
## $ Pressure : int 1020 1020 1020 1020 1020 1020 1020 1020 1020 1020 ...
## $ ChamberV1: num 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 ...
#Check and remove missing data by row
colSums(is.na(data))
## CH4 N2O Bottle dt dts CollarA ChamberV File
## 0 0 0 0 0 0 0 0
## Year Intensity Plot Collar CollarH ST SM Pressure
## 0 0 0 0 0 0 0 0
## ChamberV1
## 0
<- na.exclude(data)
data
#Pick specific bottles (closure time)
#This code selects measurements from bottle 113 to bottle 118
<- data %>% filter(Bottle >= 113 & Bottle <= 118)
sub
#Model and calculate standard deviation
<- lm(CH4 ~ dt, data = sub)
m1 <- sd(m1$fitted.values)
sd2
#Plot regression line and observations
plot(CH4 ~ dt, data = sub)
abline(m1, col = "blue", lwd = 2)
abline(m1$coefficients[1] + sd2, m1$coefficients[2])
abline(m1$coefficients[1] - sd2, m1$coefficients[2])
##Remove observations as necessary and refit model with sub1.
#If removal is not necessary, use the rest of script with sub instead of sub1
#This code removes rows 1 and 4 from the sub dataset
<- sub[-c(1,4),]
sub1 <- lm(CH4 ~ dt, data = sub1)
m1 <- sd(m1$fitted.values)
sd2 plot(CH4 ~ dt, data = sub1)
abline(m1, col = "blue", lwd = 2)
abline(m1$coefficients[1] + sd2, m1$coefficients[2])
abline(m1$coefficients[1] - sd2, m1$coefficients[2])
#Add fitted values to dataframe
$CH4fitted <- fitted(m1)
sub1
#Calculate and add fitted diff to dataframe
$CH4diff <- sub1$CH4fitted -
sub1lag(sub1$CH4fitted, default = first(sub1$CH4fitted))
Flux calculation
This code calculates CH4 flux in mg CH4 m-2 s-1
Remember to adjust the flux calculation according to the gas respective molar mass, CO2 = 44.01, N2O = 44.013, CH4 = 16.042
<- sub1$CH4diff * 16.042 * sub1$Pressure *
flux 273.15 / (22.414 * 1013 *(sub1$ST + 273.15)) *
$ChamberV / sub1$CollarA) / sub1$dts
(sub1
#Remove NaN and false zeros
<- as.data.frame(flux)
df <- na.omit(df)
df <- fun.zero.omit(df) df
Write down average flux for chosen bottles
The mean CH4 flux for bottles 113-118 is
mean(df$flux)
## [1] -6.048425e-06
2.2 CO2 data from the Vaisala instrument
This script calculates carbon dioxide flux in mg m-2 s-1 from the Vaisala instrument (concentration in ppm) using a linear regression fit.
#Load libraries
library("readbulk")
library("plyr")
library("dplyr")
#Set directory and load data
<- read_bulk(directory = "D:/Git/project-3/raw-data/vaisala-raw/",
raw_data subdirectories = c("01", "02", "03", "04"),
extension = ".csv")
#Replace values in "column T.II....C" with the non-NA values from column "TGAS.I....C" creating new column "TempChamber"
$TempChamber <- ifelse(is.na(raw_data$T.II....C),
raw_data$TGAS.I....C, raw_data$T.II....C)
raw_data
#Rename variables to nicer things
names(raw_data)[names(raw_data) == "X"] <- "Time"
names(raw_data)[names(raw_data) == "CO2.I...ppm"] <- "CO2"
names(raw_data)[names(raw_data) == "RH.II...."] <- "RH"
names(raw_data)[names(raw_data) == "Subdirectory"] <- "Vaisala"
#Remove unnecessary columns
#This code removes 3rd and 7th columns
<- select(raw_data, -c(3, 7))
raw_data
#Grab metadata (pick the correct path)
<- dir("D:/Git/project-3/raw-data/vaisala-raw/")[grepl('\\.csv$', dir("D:/Git/project-3/raw-data/vaisala-raw/"))]
fl <- paste("D:/Git/project-3/raw-data/vaisala-raw/", fl, sep = '')
fl <- do.call(rbind,
meta lapply(fl, read.csv, as.is = TRUE))
#Merge datasets by file name keeping order
names(meta)[names(meta) == "File"] <- "File"
<- join(raw_data, meta,
proc_data type = "inner")
#Remove missing Vaisala data
colSums(is.na(proc_data))
## Time CO2 RH Vaisala File TempChamber
## 0 4 0 0 0 3
## Closure Calib1 Calib2 Year Intensity Plot
## 0 0 0 0 0 0
## Collar CollarA ChamberV Pressure ST SM
## 0 0 0 0 0 0
<- na.exclude(proc_data)
proc_data
#Calibrate CO2 according to Vaisala
$CO2calib <- with (proc_data, (CO2 - Calib1) / Calib2)
proc_data
#To calculate dt we need to to convert Time from factor to POSIXct
$Datetime <- as.POSIXct(paste(proc_data$Time,tz = ""),
proc_dataformat = "%d/%m/%Y %H:%M:%S")
#Pick specific Vaisala and closure (file)
#This code picked Vaisala 4 and closure 8
<- subset(proc_data, Vaisala == "04" & Closure == "8")
sub $dt <- as.numeric(sub$Datetime - sub$Datetime[1])
sub<- lm(CO2calib ~ dt, data = sub)
model1 plot(CO2calib ~ dt, data = sub)
abline(model1, col = "blue", lwd = 2)
#Add fitted values to dataframe
$CO2fitted <- fitted(model1)
sub
#Calculate average CO<sub>2</sub>
<- as.numeric(coef(model1)[2] *
flux $ChamberV / sub$CollarA) *
(sub44.01 / 22.414) * (273.15 / (273.15 + sub$Temp))) (
The mean flux for Vaisala 4 and enclosure 8 is
mean(flux)
## [1] 0.1005061
3 Analyse soil CO2 emissions
#Load libraries
library("plyr")
library("dplyr")
library("ggplot2")
library("lme4")
library("extrafont")
<- "Segoe UI"
myFont windowsFonts(mono = myFont)
setwd("D:/Git/project-1/processed-data/")
<- read.table(file = "Database_fluxes_v0.txt",
data header = TRUE,
dec = ".")
#Renaming all variables
names(data)[names(data) == "fire"] <- "TAF"
names(data)[names(data) == "tsoil"] <- "ST"
names(data)[names(data) == "msoil"] <- "SM"
str(data)
## 'data.frame': 216 obs. of 8 variables:
## $ TAF : int 179 179 179 179 179 179 179 179 179 179 ...
## $ plot : chr "a" "a" "b" "b" ...
## $ collar: int 1 2 3 4 5 6 1 2 3 4 ...
## $ month : chr "maio" "maio" "maio" "maio" ...
## $ site : chr "179years" "179years" "179years" "179years" ...
## $ co2 : num 0.101 0.0669 0.1075 0.0893 0.0846 ...
## $ ST : num 9 8.75 11.05 9.4 10.65 ...
## $ SM : num 16.4 18.8 14.4 7.8 15.3 ...
#Convert to factor
$plot<-factor(data$plot)
data$collar<-factor(data$collar)
data$month<-factor(data$month)
data$site<-factor(data$site)
data
#Remove missing data
colSums(is.na(data))
## TAF plot collar month site co2 ST SM
## 0 0 0 0 0 0 11 11
<- na.exclude(data)
data1
#Change the unit to g m-2d-1
$co2 <- data1$co2 * 86.4
data1
#Fit model with random structure based on model design using REML for final results
<- lmer(co2 ~ site * ST + site * SM + (1 + ST | month) +
full 1 | plot/collar), REML = T, data = data1)
(
#Fit model with random structure based on model design using using ML for model selection and model validation
<- lmer(co2 ~ site * ST + site * SM + (1 + ST | month) +
fullML 1 | plot/collar), REML = F, data = data1)
(
#Model selection
::kable(drop1(fullML, test="Chi")) knitr
npar | AIC | LRT | Pr(Chi) | |
---|---|---|---|---|
NA | 1244.697 | NA | NA | |
site:ST | 5 | 1259.229 | 24.53153 | 0.0001716 |
site:SM | 5 | 1252.645 | 17.94806 | 0.0030122 |
#Model validation: residuals vs. fitted
<- resid(fullML)
E1 <- fitted(fullML)
F1 par(mfrow = c(1, 1))
plot(x = F1,
y = E1,
xlab = "Fitted values",
ylab = "Residuals")
abline(h = 0, lty = 2, col = 1)
#Model estimates
summary(full)
## Linear mixed model fit by REML ['lmerMod']
## Formula: co2 ~ site * ST + site * SM + (1 + ST | month) + (1 | plot/collar)
## Data: data1
##
## REML criterion at convergence: 1193.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8354 -0.4295 0.0060 0.3401 4.1420
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## collar:plot (Intercept) 6.954e+00 2.63708
## plot (Intercept) 5.603e+00 2.36701
## month (Intercept) 9.916e+00 3.14890
## ST 7.267e-04 0.02696 1.00
## Residual 1.499e+01 3.87172
## Number of obs: 205, groups: collar:plot, 36; plot, 18; month, 6
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.98012 7.04356 0.281
## site19years -2.39753 7.59132 -0.316
## site34years 0.41092 7.34670 0.056
## site65years 0.51263 7.57206 0.068
## site76years -12.51082 8.18110 -1.529
## site8years 7.55761 7.28666 1.037
## ST 0.93067 0.43804 2.125
## SM 0.12848 0.24694 0.520
## site19years:ST 0.26722 0.42491 0.629
## site34years:ST -0.22885 0.42703 -0.536
## site65years:ST 0.37095 0.43402 0.855
## site76years:ST 0.90376 0.45103 2.004
## site8years:ST -0.88181 0.39600 -2.227
## site19years:SM -0.16800 0.26474 -0.635
## site34years:SM -0.03273 0.27130 -0.121
## site65years:SM -0.20354 0.28559 -0.713
## site76years:SM 0.26339 0.26947 0.977
## site8years:SM -0.42289 0.27445 -1.541
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
#Visualize what the model is doing for soil temperature
<- ddply(data1,
MyData
.(site),
summarize,ST = seq(min(ST),
max(ST),
length = 30))
"SM"] <- mean(data1$SM)
MyData[
<- model.matrix(~site*ST + site*SM,
X data = MyData)
<- fixef(full)
beta $mu<-X%*%beta
MyData
$SE <- sqrt( diag( X %*% vcov(full) %*% t(X)))
MyData$seup <- MyData$mu + 1.96 * MyData$SE #Lower bound
MyData$selow <- MyData$mu - 1.96 * MyData$SE #Upper bound
MyData::kable(head(MyData)) knitr
site | ST | SM | mu | SE | seup | selow |
---|---|---|---|---|---|---|
179years | 8.750000 | 15.83049 | 12.15737 | 3.109175 | 18.25135 | 6.063387 |
179years | 9.055172 | 15.83049 | 12.44139 | 3.030924 | 18.38200 | 6.500775 |
179years | 9.360345 | 15.83049 | 12.72540 | 2.956648 | 18.52043 | 6.930370 |
179years | 9.665517 | 15.83049 | 13.00942 | 2.886655 | 18.66726 | 7.351572 |
179years | 9.970690 | 15.83049 | 13.29343 | 2.821263 | 18.82311 | 7.763755 |
179years | 10.275862 | 15.83049 | 13.57745 | 2.760799 | 18.98861 | 8.166279 |
<- c("8years" = "#F7BA7A", "19years" = "#C7A66F",
cols1 "34years" = "#979364", "65years" = "#687F5A",
"76years" = "#386C4F", "179years" = "#085844")
#CO2 vs. Soil temperature
<- ggplot()
p1 <- p1 + geom_point(data = data1,
p1 aes(y = co2,
x = ST,
col = site,
shape = site), size = 1)
<- p1 + xlab(expression('Soil temperature ('*~degree*C*')')) +
p1 ylab(bquote(''*g ~CO[2]~ m^-2~d^-1*''))
<- p1 + geom_line(data = MyData,
p1 aes(x = ST,
y = mu))
<- p1 + geom_ribbon(data = MyData,
p1 aes(x = ST,
ymax = seup,
ymin = selow),
alpha = 0.1)
<- p1 + facet_grid(. ~ factor(site,
p1 levels = c("8years", "19years", "34years",
"65years","76years", "179years"),
labels = c("8 years", "19 years", "34 years",
"65 years","76 years", "179 years")))
<- p1 + theme_bw() +
p1 theme(text=element_text(size = 12, family="mono")) +
theme(legend.position = "none")
<- p1 + scale_colour_manual(
p1 values = cols1,
aesthetics = "colour")
<- p1+ scale_shape_manual(name = "Time after fire",
p1 breaks = c("8years", "19years", "34years",
"65years", "76years", "179years"),
values = c(8, 17, 15, 4, 7, 16))
#CO2 vs. Soil moisture
<- ddply(data1,
MyData
.(site),
summarize,SM = seq(min(SM),
max(SM),
length = 30))
"ST"] <- mean(data1$SM)
MyData[
<- model.matrix(~site*ST + site*SM,
X data = MyData)
<- fixef(full)
beta $mu<-X%*%beta
MyData
$SE <- sqrt( diag( X %*% vcov(full) %*% t(X)))
MyData$seup <- MyData$mu + 1.96 * MyData$SE
MyData$selow <- MyData$mu - 1.96 * MyData$SE
MyData::kable(head(MyData)) knitr
site | SM | ST | mu | SE | seup | selow |
---|---|---|---|---|---|---|
179years | 4.950000 | 15.83049 | 17.34905 | 3.205163 | 23.63117 | 11.06693 |
179years | 5.446552 | 15.83049 | 17.41285 | 3.137251 | 23.56186 | 11.26384 |
179years | 5.943103 | 15.83049 | 17.47665 | 3.072732 | 23.49920 | 11.45409 |
179years | 6.439655 | 15.83049 | 17.54044 | 3.011826 | 23.44362 | 11.63726 |
179years | 6.936207 | 15.83049 | 17.60424 | 2.954755 | 23.39556 | 11.81292 |
179years | 7.432759 | 15.83049 | 17.66804 | 2.901745 | 23.35546 | 11.98062 |
<- ggplot()
p11 <- p11 + geom_point(data = data1,
p11 aes(y = co2,
x = SM,
col = site,
shape = site), size = 1)
<- p11 + xlab("Soil moisture (%)") +
p11 ylab(bquote(''*g ~CO[2]~ m^-2~d^-1*''))
<- p11 + geom_line(data = MyData,
p11 aes(x = SM,
y = mu))
<- p11 + geom_ribbon(data = MyData,
p11 aes(x = SM,
ymax = seup,
ymin = selow),
alpha = 0.1)
<- p11 + facet_grid(. ~ factor(site,
p11 levels = c("8years", "19years", "34years",
"65years","76years", "179years"),
labels = c("8 years", "19 years", "34 years",
"65 years","76 years", "179 years")))
<- p11 + theme_bw() +
p11 theme(text=element_text(size = 12, family="mono")) +
theme(legend.position= "none")
<- p11 + scale_colour_manual(
p11 values = cols1,
aesthetics = "colour")
<- p11 + scale_shape_manual(name = "Time after fire",
p11 breaks = c("8years", "19years", "34years",
"65years", "76years", "179years"),
values = c(8, 17, 15, 4, 7, 16))
p1
Linear mixed model predictions showing the relationships between soil CO2 efflux and soil temperature (soil moisture was held constant).
p11
Linear mixed model predictions showing the relationships between soil CO2 efflux and soil moisture (soil temperature was held constant).
4 References
Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data Analysis. Journal of Statistical Software, 40(1), 1-29. http://www.jstatsoft.org/v40/i01/.
Timur V. Elzhov, Katharine M. Mullen, Andrej-Nikolai Spiess and Ben Bolker (2016). minpack.lm: R Interface to the Levenberg-Marquardt Nonlinear Least-Squares Algorithm Found in MINPACK, Plus Support for Bounds. R package version 1.2-1. https://CRAN.R-project.org/package=minpack.lm
Hadley Wickham, Romain Francois, Lionel Henry and Kirill Muller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.7. https://CRAN.R-project.org/package=dplyr
Kieslich, P. J., & Henninger, F. (2016). Readbulk: An R package for reading and combining multiple data files. https://doi.org/10.5281/zenodo.596649
Steve Su, with contributions from: Diethelm Wuertz, Martin Maechler, Rmetrics core team members for low discrepancy algorithm, Juha Karvanen for L moments codes, Robert King for gld C codes, starship codes, Benjamin Dean for corrections, input in ks.gof code and R core team for histsu function. (2020). GLDEX: Fitting Single and Mixture of Generalised Lambda Distributions (RS and FMKL) using Various Methods. R package version 2.0.0.7. https://CRAN.R-project.org/package=GLDEX