1 About this notebook
This notebook includes code that I have used in the past and aim to improve.
Packages used plyr, dplyr, minpack.lm, readbulk, and GLDEX. See citation at the end of script.
My GitHub repository for these notebooks
2 Process chamber-based soil GHG flux data
2.1 GHG data from gas chromatography
This script calculates gas (carbon dioxide, methane, nitrous oxide) flux in mg m-2 s-1 from gas chromatography data (concentration in ppm) using a linear regression fit.
#Load libraries
library("plyr")
library("dplyr")
library("minpack.lm")
library("GLDEX")
#Set directory and load data
<- setwd("D:/Git/project-3/raw-data/field/")
Path <- read.csv("gasvials.csv", header = TRUE)
data head(data)
str(data)
## 'data.frame': 498 obs. of 17 variables:
## $ CH4 : num 1.89 1.89 1.92 1.92 1.93 ...
## $ N2O : num 0.331 0.333 0.333 0.332 0.335 ...
## $ Bottle : int 101 102 103 104 105 106 107 108 109 110 ...
## $ dt : int 0 1 3 5 10 20 0 1 3 5 ...
## $ dts : int 0 60 120 300 600 1200 0 60 120 300 ...
## $ CollarA : num 0.0337 0.0337 0.0337 0.0337 0.0337 ...
## $ ChamberV : num 0.0088 0.0088 0.0088 0.0088 0.0088 ...
## $ File : chr "00:49" "00:49" "00:49" "00:49" ...
## $ Year : int 2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
## $ Intensity: chr "H" "H" "H" "H" ...
## $ Plot : chr "1" "1" "1" "1" ...
## $ Collar : chr "A" "A" "A" "A" ...
## $ CollarH : num 4.5 4.5 4.5 4.5 4.5 4.5 4.2 4.2 4.2 4.2 ...
## $ ST : num 15.8 15.8 15.8 15.8 15.8 ...
## $ SM : num 29.2 29.2 29.2 29.2 29.2 ...
## $ Pressure : int 1020 1020 1020 1020 1020 1020 1020 1020 1020 1020 ...
## $ ChamberV1: num 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 0.00728 ...
#Check and remove missing data by row
colSums(is.na(data))
## CH4 N2O Bottle dt dts CollarA ChamberV File
## 0 0 0 0 0 0 0 0
## Year Intensity Plot Collar CollarH ST SM Pressure
## 0 0 0 0 0 0 0 0
## ChamberV1
## 0
<- na.exclude(data)
data
#Pick specific bottles (closure time)
#This code selects measurements from bottle 113 to bottle 118
<- data %>% filter(Bottle >= 113 & Bottle <= 118)
sub
#Model and calculate standard deviation
<- lm(CH4 ~ dt, data = sub)
m1 <- sd(m1$fitted.values)
sd2
#Plot regression line and observations
plot(CH4 ~ dt, data = sub)
abline(m1, col = "blue", lwd = 2)
abline(m1$coefficients[1] + sd2, m1$coefficients[2])
abline(m1$coefficients[1] - sd2, m1$coefficients[2])
##Remove observations as necessary and refit model with sub1.
#If removal is not necessary, use the rest of script with sub instead of sub1
#This code removes rows 1 and 4 from the sub dataset
<- sub[-c(1,4),]
sub1 <- lm(CH4 ~ dt, data = sub1)
m1 <- sd(m1$fitted.values)
sd2 plot(CH4 ~ dt, data = sub1)
abline(m1, col = "blue", lwd = 2)
abline(m1$coefficients[1] + sd2, m1$coefficients[2])
abline(m1$coefficients[1] - sd2, m1$coefficients[2])
#Add fitted values to dataframe
$CH4fitted <- fitted(m1)
sub1
#Calculate and add fitted diff to dataframe
$CH4diff <- sub1$CH4fitted -
sub1lag(sub1$CH4fitted, default = first(sub1$CH4fitted))
Flux calculation
This code calculates CH4 flux in mg CH4 m-2 s-1
Remember to adjust the flux calculation according to the gas respective molar mass, CO2 = 44.01, N2O = 44.013, CH4 = 16.042
<- sub1$CH4diff * 16.042 * sub1$Pressure *
flux 273.15 / (22.414 * 1013 *(sub1$ST + 273.15)) *
$ChamberV / sub1$CollarA) / sub1$dts
(sub1
#Remove NaN and false zeros
<- as.data.frame(flux)
df <- na.omit(df)
df <- fun.zero.omit(df) df
Write down average flux for chosen bottles
The mean CH4 flux for bottles 113-118 is
mean(df$flux)
## [1] -6.048425e-06
2.2 CO2 data from the Vaisala instrument
This script calculates carbon dioxide flux in mg m-2 s-1 from the Vaisala instrument (concentration in ppm) using a linear regression fit.
#Load libraries
library("readbulk")
library("plyr")
library("dplyr")
#Set directory and load data
<- read_bulk(directory = "D:/Git/project-3/raw-data/vaisala-raw/",
raw_data subdirectories = c("01", "02", "03", "04"),
extension = ".csv")
#Replace values in "column T.II....C" with the non-NA values from column "TGAS.I....C" creating new column "TempChamber"
$TempChamber <- ifelse(is.na(raw_data$T.II....C),
raw_data$TGAS.I....C, raw_data$T.II....C)
raw_data
#Rename variables to nicer things
names(raw_data)[names(raw_data) == "X"] <- "Time"
names(raw_data)[names(raw_data) == "CO2.I...ppm"] <- "CO2"
names(raw_data)[names(raw_data) == "RH.II...."] <- "RH"
names(raw_data)[names(raw_data) == "Subdirectory"] <- "Vaisala"
#Remove unnecessary columns
#This code removes 3rd and 7th columns
<- select(raw_data, -c(3, 7))
raw_data
#Grab metadata (pick the correct path)
<- dir("D:/Git/project-3/raw-data/vaisala-raw/")[grepl('\\.csv$', dir("D:/Git/project-3/raw-data/vaisala-raw/"))]
fl <- paste("D:/Git/project-3/raw-data/vaisala-raw/", fl, sep = '')
fl <- do.call(rbind,
meta lapply(fl, read.csv, as.is = TRUE))
#Merge datasets by file name keeping order
names(meta)[names(meta) == "File"] <- "File"
<- join(raw_data, meta,
proc_data type = "inner")
#Remove missing Vaisala data
colSums(is.na(proc_data))
## Time CO2 RH Vaisala File TempChamber
## 0 4 0 0 0 3
## Closure Calib1 Calib2 Year Intensity Plot
## 0 0 0 0 0 0
## Collar CollarA ChamberV Pressure ST SM
## 0 0 0 0 0 0
<- na.exclude(proc_data)
proc_data
#Calibrate CO2 according to Vaisala
$CO2calib <- with (proc_data, (CO2 - Calib1) / Calib2)
proc_data
#To calculate dt we need to to convert Time from factor to POSIXct
$Datetime <- as.POSIXct(paste(proc_data$Time,tz = ""),
proc_dataformat = "%d/%m/%Y %H:%M:%S")
#Pick specific Vaisala and closure (file)
#This code picked Vaisala 4 and closure 8
<- subset(proc_data, Vaisala == "04" & Closure == "8")
sub $dt <- as.numeric(sub$Datetime - sub$Datetime[1])
sub<- lm(CO2calib ~ dt, data = sub)
model1 plot(CO2calib ~ dt, data = sub)
abline(model1, col = "blue", lwd = 2)
#Add fitted values to dataframe
$CO2fitted <- fitted(model1)
sub
#Calculate average CO<sub>2</sub>
<- as.numeric(coef(model1)[2] *
flux $ChamberV / sub$CollarA) *
(sub44.01 / 22.414) * (273.15 / (273.15 + sub$Temp))) (
The mean flux for Vaisala 4 and enclosure 8 is
mean(flux)
## [1] 0.1005061
3 Analyse soil CO2 emissions
#Load libraries
library("plyr")
library("dplyr")
library("ggplot2")
library("lme4")
library("extrafont")
<- "Segoe UI"
myFont windowsFonts(mono = myFont)
setwd("D:/Git/project-1/processed-data/")
<- read.table(file = "Database_fluxes_v0.txt",
data header = TRUE,
dec = ".")
#Renaming all variables
names(data)[names(data) == "fire"] <- "TAF"
names(data)[names(data) == "tsoil"] <- "ST"
names(data)[names(data) == "msoil"] <- "SM"
str(data)
## 'data.frame': 216 obs. of 8 variables:
## $ TAF : int 179 179 179 179 179 179 179 179 179 179 ...
## $ plot : chr "a" "a" "b" "b" ...
## $ collar: int 1 2 3 4 5 6 1 2 3 4 ...
## $ month : chr "maio" "maio" "maio" "maio" ...
## $ site : chr "179years" "179years" "179years" "179years" ...
## $ co2 : num 0.101 0.0669 0.1075 0.0893 0.0846 ...
## $ ST : num 9 8.75 11.05 9.4 10.65 ...
## $ SM : num 16.4 18.8 14.4 7.8 15.3 ...
#Convert to factor
$plot<-factor(data$plot)
data$collar<-factor(data$collar)
data$month<-factor(data$month)
data$site<-factor(data$site)
data
#Remove missing data
colSums(is.na(data))
## TAF plot collar month site co2 ST SM
## 0 0 0 0 0 0 11 11
<- na.exclude(data)
data1
#Change the unit to g m-2d-1
$co2 <- data1$co2 * 86.4
data1
#Fit model with random structure based on model design using REML for final results
<- lmer(co2 ~ site * ST + site * SM + (1 + ST | month) +
full 1 | plot/collar), REML = T, data = data1)
(
#Fit model with random structure based on model design using using ML for model selection and model validation
<- lmer(co2 ~ site * ST + site * SM + (1 + ST | month) +
fullML 1 | plot/collar), REML = F, data = data1)
(
#Model selection
::kable(drop1(fullML, test="Chi")) knitr
npar | AIC | LRT | Pr(Chi) | |
---|---|---|---|---|
NA | 1244.697 | NA | NA | |
site:ST | 5 | 1259.229 | 24.53153 | 0.0001716 |
site:SM | 5 | 1252.645 | 17.94806 | 0.0030122 |
#Model validation: residuals vs. fitted
<- resid(fullML)
E1 <- fitted(fullML)
F1 par(mfrow = c(1, 1))
plot(x = F1,
y = E1,
xlab = "Fitted values",
ylab = "Residuals")
abline(h = 0, lty = 2, col = 1)
#Model estimates
summary(full)
## Linear mixed model fit by REML ['lmerMod']
## Formula: co2 ~ site * ST + site * SM + (1 + ST | month) + (1 | plot/collar)
## Data: data1
##
## REML criterion at convergence: 1193.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8354 -0.4295 0.0060 0.3401 4.1420
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## collar:plot (Intercept) 6.954e+00 2.63708
## plot (Intercept) 5.603e+00 2.36701
## month (Intercept) 9.916e+00 3.14890
## ST 7.267e-04 0.02696 1.00
## Residual 1.499e+01 3.87172
## Number of obs: 205, groups: collar:plot, 36; plot, 18; month, 6
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.98012 7.04356 0.281
## site19years -2.39753 7.59132 -0.316
## site34years 0.41092 7.34670 0.056
## site65years 0.51263 7.57206 0.068
## site76years -12.51082 8.18110 -1.529
## site8years 7.55761 7.28666 1.037
## ST 0.93067 0.43804 2.125
## SM 0.12848 0.24694 0.520
## site19years:ST 0.26722 0.42491 0.629
## site34years:ST -0.22885 0.42703 -0.536
## site65years:ST 0.37095 0.43402 0.855
## site76years:ST 0.90376 0.45103 2.004
## site8years:ST -0.88181 0.39600 -2.227
## site19years:SM -0.16800 0.26474 -0.635
## site34years:SM -0.03273 0.27130 -0.121
## site65years:SM -0.20354 0.28559 -0.713
## site76years:SM 0.26339 0.26947 0.977
## site8years:SM -0.42289 0.27445 -1.541
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
#Visualize what the model is doing for soil temperature
<- ddply(data1,
MyData
.(site),
summarize,ST = seq(min(ST),
max(ST),
length = 30))
"SM"] <- mean(data1$SM)
MyData[
<- model.matrix(~site*ST + site*SM,
X data = MyData)
<- fixef(full)
beta $mu<-X%*%beta
MyData
$SE <- sqrt( diag( X %*% vcov(full) %*% t(X)))
MyData$seup <- MyData$mu + 1.96 * MyData$SE #Lower bound
MyData$selow <- MyData$mu - 1.96 * MyData$SE #Upper bound
MyData::kable(head(MyData)) knitr
site | ST | SM | mu | SE | seup | selow |
---|---|---|---|---|---|---|
179years | 8.750000 | 15.83049 | 12.15737 | 3.109175 | 18.25135 | 6.063387 |
179years | 9.055172 | 15.83049 | 12.44139 | 3.030924 | 18.38200 | 6.500775 |
179years | 9.360345 | 15.83049 | 12.72540 | 2.956648 | 18.52043 | 6.930370 |
179years | 9.665517 | 15.83049 | 13.00942 | 2.886655 | 18.66726 | 7.351572 |
179years | 9.970690 | 15.83049 | 13.29343 | 2.821263 | 18.82311 | 7.763755 |
179years | 10.275862 | 15.83049 | 13.57745 | 2.760799 | 18.98861 | 8.166279 |
<- c("8years" = "#F7BA7A", "19years" = "#C7A66F",
cols1 "34years" = "#979364", "65years" = "#687F5A",
"76years" = "#386C4F", "179years" = "#085844")
#CO2 vs. Soil temperature
<- ggplot()
p1 <- p1 + geom_point(data = data1,
p1 aes(y = co2,
x = ST,
col = site,
shape = site), size = 1)
<- p1 + xlab(expression('Soil temperature ('*~degree*C*')')) +
p1 ylab(bquote(''*g ~CO[2]~ m^-2~d^-1*''))
<- p1 + geom_line(data = MyData,
p1 aes(x = ST,
y = mu))
<- p1 + geom_ribbon(data = MyData,
p1 aes(x = ST,
ymax = seup,
ymin = selow),
alpha = 0.1)
<- p1 + facet_grid(. ~ factor(site,
p1 levels = c("8years", "19years", "34years",
"65years","76years", "179years"),
labels = c("8 years", "19 years", "34 years",
"65 years","76 years", "179 years")))
<- p1 + theme_bw() +
p1 theme(text=element_text(size = 12, family="mono")) +
theme(legend.position = "none")
<- p1 + scale_colour_manual(
p1 values = cols1,
aesthetics = "colour")
<- p1+ scale_shape_manual(name = "Time after fire",
p1 breaks = c("8years", "19years", "34years",
"65years", "76years", "179years"),
values = c(8, 17, 15, 4, 7, 16))
#CO2 vs. Soil moisture
<- ddply(data1,
MyData
.(site),
summarize,SM = seq(min(SM),
max(SM),
length = 30))
"ST"] <- mean(data1$SM)
MyData[
<- model.matrix(~site*ST + site*SM,
X data = MyData)
<- fixef(full)
beta $mu<-X%*%beta
MyData
$SE <- sqrt( diag( X %*% vcov(full) %*% t(X)))
MyData$seup <- MyData$mu + 1.96 * MyData$SE
MyData$selow <- MyData$mu - 1.96 * MyData$SE
MyData::kable(head(MyData)) knitr
site | SM | ST | mu | SE | seup | selow |
---|---|---|---|---|---|---|
179years | 4.950000 | 15.83049 | 17.34905 | 3.205163 | 23.63117 | 11.06693 |
179years | 5.446552 | 15.83049 | 17.41285 | 3.137251 | 23.56186 | 11.26384 |
179years | 5.943103 | 15.83049 | 17.47665 | 3.072732 | 23.49920 | 11.45409 |
179years | 6.439655 | 15.83049 | 17.54044 | 3.011826 | 23.44362 | 11.63726 |
179years | 6.936207 | 15.83049 | 17.60424 | 2.954755 | 23.39556 | 11.81292 |
179years | 7.432759 | 15.83049 | 17.66804 | 2.901745 | 23.35546 | 11.98062 |
<- ggplot()
p11 <- p11 + geom_point(data = data1,
p11 aes(y = co2,
x = SM,
col = site,
shape = site), size = 1)
<- p11 + xlab("Soil moisture (%)") +
p11 ylab(bquote(''*g ~CO[2]~ m^-2~d^-1*''))
<- p11 + geom_line(data = MyData,
p11 aes(x = SM,
y = mu))
<- p11 + geom_ribbon(data = MyData,
p11 aes(x = SM,
ymax = seup,
ymin = selow),
alpha = 0.1)
<- p11 + facet_grid(. ~ factor(site,
p11 levels = c("8years", "19years", "34years",
"65years","76years", "179years"),
labels = c("8 years", "19 years", "34 years",
"65 years","76 years", "179 years")))
<- p11 + theme_bw() +
p11 theme(text=element_text(size = 12, family="mono")) +
theme(legend.position= "none")
<- p11 + scale_colour_manual(
p11 values = cols1,
aesthetics = "colour")
<- p11 + scale_shape_manual(name = "Time after fire",
p11 breaks = c("8years", "19years", "34years",
"65years", "76years", "179years"),
values = c(8, 17, 15, 4, 7, 16))
p1
p11
4 References
Hadley Wickham (2011). The Split-Apply-Combine Strategy for Data Analysis. Journal of Statistical Software, 40(1), 1-29. http://www.jstatsoft.org/v40/i01/.
Timur V. Elzhov, Katharine M. Mullen, Andrej-Nikolai Spiess and Ben Bolker (2016). minpack.lm: R Interface to the Levenberg-Marquardt Nonlinear Least-Squares Algorithm Found in MINPACK, Plus Support for Bounds. R package version 1.2-1. https://CRAN.R-project.org/package=minpack.lm
Hadley Wickham, Romain Francois, Lionel Henry and Kirill Muller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.7. https://CRAN.R-project.org/package=dplyr
Kieslich, P. J., & Henninger, F. (2016). Readbulk: An R package for reading and combining multiple data files. https://doi.org/10.5281/zenodo.596649
Steve Su, with contributions from: Diethelm Wuertz, Martin Maechler, Rmetrics core team members for low discrepancy algorithm, Juha Karvanen for L moments codes, Robert King for gld C codes, starship codes, Benjamin Dean for corrections, input in ks.gof code and R core team for histsu function. (2020). GLDEX: Fitting Single and Mixture of Generalised Lambda Distributions (RS and FMKL) using Various Methods. R package version 2.0.0.7. https://CRAN.R-project.org/package=GLDEX