From 05d0f6ce07871b1cf1cecfa4d433b3b33728e2fa Mon Sep 17 00:00:00 2001
From: NPJuncal <npjuncal@gmail.com>
Date: Wed, 19 Apr 2023 14:24:26 +0100
Subject: [PATCH] Functional functions :) transforme_om_oc, Model linear
 relation between organic matter and organic carbon and estimate organic
 carbon values from organic matter data estimate_h, checks for space between
 samples and, if any, divide this space between the previous and next sample
 to return sample thickness withouth gaps in the core

estimate_stocks, Estimates the carbon stock from soil core data until a specific depth, 100 cm by default. If the core do not reach the standardization depth it extrapolate the stock from a linear model between the organic carbon accumulated mass and depth.

test_extrapolations, subset those cores that reach the standardization depth and estimates the stock (observed stock), estimate the stock from the linear relation of organic carbon accumulated mass and depth using the 90, 75, 50 and 25% top length of the indicated standardization depth. Compares the observed stock with the estimated stocks by extrapolation.

estimate_fluxes, estimate the average organic carbon flux to the soil in a indicated time frame (by default last 100 years) from the organic carbon concentration and ages obtained from a age-depth or age-accumulated mass model
---
 R/estimate_flux.R      |  83 ++++++++++++++++++++
 R/estimate_h.R         |  90 +++++++++++++++++++++
 R/estimate_stock.R     |  94 ++++++++++++++++++++++
 R/test_extrapolation.R | 174 +++++++++++++++++++++++++++++++++++++++++
 R/transform_om_oc.R    | 145 ++++++++++++++++++++++++++++++++++
 5 files changed, 586 insertions(+)
 create mode 100644 R/estimate_flux.R
 create mode 100644 R/estimate_h.R
 create mode 100644 R/estimate_stock.R
 create mode 100644 R/test_extrapolation.R
 create mode 100644 R/transform_om_oc.R

diff --git a/R/estimate_flux.R b/R/estimate_flux.R
new file mode 100644
index 0000000..37823c2
--- /dev/null
+++ b/R/estimate_flux.R
@@ -0,0 +1,83 @@
+#' Estimate the average organic carbon flux
+#'
+#' @description estimate the average organic carbon flux to the soil in a indicated time frame (by default last 100 years) from the organic carbon concentration and ages obtained from a age-depth or age-accumulated mass model
+#'
+#' @param df A [data.frame] with, at least, columns: Core.ID, DMin (minimum depth of the sample), DMax (maximum depth of the sample), DBD (dry bulk density), POC (organic carbon %), Age (age of the sample obtained from a age-depth or age-accumulated mass model)
+#' @param TimeFrame standardization time frame, by default 100 years
+#'
+#' @return [data.frame] with columns Core.id, F.WC (organic carbon fluxes at the whole core), A.Max (maximum age of the core), and Flux (average organic carbon flux at the indicated time frame)
+#' @export
+#'
+#' @examples
+
+
+estimate_flux<- function(df=NULL,TimeFrame=100) {
+
+  # class of the dataframe
+  if (is.data.frame(df)==FALSE) {stop("The data provided is not class data.frame, please check data and transforme")}
+  if (is.numeric(TimeFrame)==FALSE) {stop("The TimeFrame provided is not class numeric, please check data and transforme")}
+
+  # name of the columns
+  if ("CoreID" %in% colnames(df)==FALSE) {stop("There is not column named CoreID. Please, check necessary columns in functions documentation")}
+  if ("DMin" %in% colnames(df)==FALSE) {stop("There is not column named Min.D. Please, check necessary columns in functions documentation")}
+  if ("DMax" %in% colnames(df)==FALSE) {stop("There is not column named Max.D. Please, check necessary columns in functions documentation")}
+  if ("DBD" %in% colnames(df)==FALSE) {stop("There is not column named DBD. Please, check necessary columns in functions documentation")}
+  if ("fOC" %in% colnames(df)==FALSE) {stop("There is not column named fOC. Please, check necessary columns in functions documentation")}
+  if ("Age" %in% colnames(df)==FALSE) {stop("There is not column named Age. Please, check necessary columns in functions documentation")}
+
+  # class of the columns
+  if (is.numeric(df$DMin)==FALSE) {stop("Minimum depth data is not class numeric, please check")}
+  if (is.numeric(df$DMax)==FALSE) {stop("Maximum depth data is not class numeric, please check")}
+  if (is.numeric(df$DBD)==FALSE) {stop("Dry Bulk Density data is not class numeric, please check")}
+  if (is.numeric(df$fOC)==FALSE) {stop("Organic carbon data is not class numeric, please check")}
+  if (is.numeric(df$Age)==FALSE) {stop("Age data is not class numeric, please check")}
+
+
+
+  #select those cores with chronological models
+  df<-df[!is.na(df$Age),]
+  df<-df[!is.na(df$fOC),]
+
+  df<-estimate_h (df)
+
+  X<-split(df, df$CoreID)
+
+  BCF <- data.frame(CoreID=character(),
+                    F.WC=numeric(),
+                    A.Max=numeric(),
+                    Flux=numeric())
+
+
+  for(i in 1:length(X)) {
+    BCF[i,1]<-names(X[i])
+    Data<-as.data.frame(X[i])
+    colnames(Data)<-colnames(df)
+
+    if(nrow(Data)<3) next
+
+    else{
+
+      Data <-Data |> dplyr::mutate (OCgcm2 = DBD*(fOC/100)*h)
+
+      #estimation of the average carbon flux for the whole core (OC stock/Max Age)
+      BCF[i,2]<-(sum(Data[,which(colnames(Data)=="OCgcm2")]))/max(Data$Age)
+      BCF[i,3]<-max(Data$Age)
+
+      #estimation of the average carbon flux for the selected TimeFrame (OC stock last 100 yrs/TimeFrame)
+
+      if (max(Data$Age)==TimeFrame) {
+
+        BCF[i,4]<-((sum(Data[c(1:(nrow(Data))),which(colnames(Data)=="OCgcm2")]))/TimeFrame)
+
+      } else {
+        Data<-Data[c(1:(length(which(Data$Age <=TimeFrame))+1)),]
+
+        BCF[i,4]<-(
+          (sum(Data[c(1:(nrow(Data)-1)),which(colnames(Data)=="OCgcm2")])+
+                       (Data[nrow(Data),which(colnames(Data)=="OCgcm2")]/((max(Data$Age)-Data[(nrow(Data)-1),which(colnames(Data)=="Age")])))
+                        *(TimeFrame-Data[(nrow(Data)-1),which(colnames(Data)=="Age")]))/TimeFrame)
+      }
+
+    }}
+  return(BCF)
+}
diff --git a/R/estimate_h.R b/R/estimate_h.R
new file mode 100644
index 0000000..52d4985
--- /dev/null
+++ b/R/estimate_h.R
@@ -0,0 +1,90 @@
+#' Estimation of the thickness of the sample
+#'
+#' @description checks for space between samples and, if any, divide this space between the previous and next sample to return sample thickness withouth gaps in the core
+#'
+#' @param df A [data.frame] with with, at least, columns CoreID, DMin (minimum depth of the sample)and DMax (maximum depth of the sample)
+#'
+#' @return the initial [data.frame] with three additional columns: EMin (estimated minimum depth of the sample), EMax (estimated maximum depth of the sample) and h (estimated thikness of the sample)
+#' @export
+#'
+#' @examples
+estimate_h <- function(df = NULL) {
+
+  # class of the dataframe
+  if (is.data.frame(df)==FALSE) {stop("The data provided is not class data.frame, please chaeck data and transforme")}
+
+  # name of the columns
+  if ("CoreID" %in% colnames(df)==FALSE) {stop("There is not column named CoreID. Please, check necessary columns in functions documentation")}
+  if ("DMin" %in% colnames(df)==FALSE) {stop("There is not column named DMin. Please, check necessary columns in functions documentation")}
+  if ("DMax" %in% colnames(df)==FALSE) {stop("There is not column named DMax. Please, check necessary columns in functions documentation")}
+
+  # class of the columns
+  if (is.numeric(df$DMin)==FALSE) {stop("Minimum depth data is not class numeric, please check")}
+  if (is.numeric(df$DMax)==FALSE) {stop("Maximum depth data is not class numeric, please check")}
+
+  #check for NAs in depth columns
+  if (sum(is.na(df$DMin))>0){stop("Samples minimun depth column has NAs, please check")}
+  if (sum(is.na(df$DMax))>0){stop("Samples maximun depth column has NAs, please check")}
+
+
+  # create individual data frames per each core
+
+  df$CoreID <- factor(df$CoreID, levels=unique(df$CoreID))
+  X<-split(df, df$CoreID)
+
+
+  columns<-c("EMin","EMax","h")
+  Fdf2 = data.frame(matrix(nrow = 0, ncol = length(columns)))
+  colnames(Fdf2) = columns
+
+
+  for(i in 1:length(X)) {
+
+    Data<-as.data.frame(X[i])
+    colnames(Data)<-colnames(df)
+
+    #check if there is spaces between samples (e.g, first sample ends at 5 cm and next starts at 7)
+    space<- c()
+
+    for (j in 1:(nrow(Data)-1)) {
+
+      # if there are no spaces between samples min and maximun depth of samples remain the same
+      if (Data[j,which(colnames(Data)=="DMax")] == Data[j+1,which(colnames(Data)=="DMin")]) {
+        space[j]<-FALSE} else {space[j]<-TRUE}}
+
+    if (any(space==TRUE)) {
+      # if there are spaces between samples it estimate the medium point between the maximum depth of the sample and the minimun depth of the next sample
+      # and divide that distance between both samples
+      Data <- cbind(Data, EMin=NA, EMax=NA)
+      Data[1,"EMin"]<-0
+      Data[nrow(Data),"EMax"]<-Data[nrow(Data),"DMax"]
+      for (j in 1:(nrow(Data)-1)) {
+        if(space[j]==TRUE) {
+          Data[j,"EMax"]<-Data[j,"DMax"]+((Data[j+1,"DMin"]-Data[j,"DMax"])/2)
+          Data[j+1,"EMin"]<-Data[j,"DMax"]+((Data[j+1,"DMin"]-Data[j,"DMax"])/2)} else {
+            Data[j,"EMax"]<-Data[j,"DMax"]
+            Data[j+1,"EMin"]<-Data[j+1,"DMin"]}}
+
+    }  else{
+      Data <- cbind(Data, EMin=NA, EMax=NA)
+      Data$EMin<-Data$DMin
+      Data$EMax<-Data$DMax
+
+    }
+
+    Data <- cbind(Data, h=NA)
+
+    #estimation of the thickness of the sample (h) from the new minimun and max depth of the sample
+
+    Data<- Data |> dplyr::mutate (h = EMax-EMin)
+
+    temp<-cbind(Data$EMin, Data$EMax, Data$h)
+    colnames(temp)<-colnames(Fdf2)
+    Fdf2<-rbind(Fdf2, temp)
+
+  }
+  Fdf<-cbind(df, Fdf2)
+
+  return(Fdf)
+}
+
diff --git a/R/estimate_stock.R b/R/estimate_stock.R
new file mode 100644
index 0000000..e4c149a
--- /dev/null
+++ b/R/estimate_stock.R
@@ -0,0 +1,94 @@
+#' Organic Carbon Stock estimation
+#'
+#' @description Estimates the carbon stock from soil core data until a specific depth, 100 cm by default. If the core do not reach the
+#' standardization depth it extrapolate the stock from a linear model between the organic carbon accumulated mass and depth.
+#'
+#' @param df A [data.frame] with, at least, columns CoreID, DMin (minimum depth of the sample), DMax (maximum depth of the sample), DBD (dry bulk density), fOC (organic carbon %)
+#' @param Depth standardization soil depth, by default 100 cm.
+#'
+#' @return [data.frame] with columns CoreID, S.WC (organic carbon stock at the whole core), D.Max (maximum depth of the core), and Stock (organic carbon stock at the standardized depth)
+#' @export
+#'
+#' @examples
+
+estimate_stock <- function(df = NULL, Depth = 100) {
+
+  # class of the dataframe
+  if (is.data.frame(df)==FALSE) {stop("The data provided is not class data.frame, please chaeck data and transforme")}
+  if (is.numeric(Depth)==FALSE) {stop("The Depth provided is not class numeric, please chaeck data and transforme")}
+
+  # name of the columns
+  if ("CoreID" %in% colnames(df)==FALSE) {stop("There is not column named CoreID. Please, check necessary columns in functions documentation")}
+  if ("DMin" %in% colnames(df)==FALSE) {stop("There is not column named Min.D. Please, check necessary columns in functions documentation")}
+  if ("DMax" %in% colnames(df)==FALSE) {stop("There is not column named Max.D. Please, check necessary columns in functions documentation")}
+  if ("DBD" %in% colnames(df)==FALSE) {stop("There is not column named DBD. Please, check necessary columns in functions documentation")}
+  if ("fOC" %in% colnames(df)==FALSE) {stop("There is not column named fOC. Please, check necessary columns in functions documentation")}
+
+  # class of the columns
+  if (is.numeric(df$DMin)==FALSE) {stop("Minimum depth data is not class numeric, please check")}
+  if (is.numeric(df$DMax)==FALSE) {stop("Maximum depth data is not class numeric, please check")}
+  if (is.numeric(df$DBD)==FALSE) {stop("Dry Bulk Density data is not class numeric, please check")}
+  if (is.numeric(df$fOC)==FALSE) {stop("Organic carbon data is not class numeric, please check")}
+
+
+
+  df<-df[!is.na(df$fOC),]
+
+  # estimate thickness of the sample
+
+  dfh<-estimate_h(df)
+
+  # estimate stocks
+
+  X<-split(dfh, dfh$CoreID)
+
+  BCS <- data.frame(CoreID=character(),
+                    S.WC=numeric(),
+                    D.Max=numeric(),
+                    Stock=numeric())
+
+  for(i in 1:length(X)) {
+    BCS[i,1]<-names(X[i])
+    Data<-as.data.frame(X[i])
+    colnames(Data)<-colnames(dfh)
+
+    if(nrow(Data)<3) next
+
+    else{
+
+      #estimation of carbon g cm2 per sample, OCgcm2= carbon density (g cm3) by thickness (h)
+      Data <-Data |> dplyr::mutate (OCgcm2 = DBD*(fOC/100)*h)
+
+      #estimation of the OC stock in the whole core
+      BCS[i,2]<-sum(Data[,which(colnames(Data)=="OCgcm2")])
+      BCS[i,3]<-max(Data[,which(colnames(Data)=="EMax")])
+
+      #if core exactly the standarization depth, we keep the stock of the whole core
+      if(max(Data$EMax)==Depth) {BCS[i,4]<-sum(Data[,which(colnames(Data)=="OCgcm2")])}
+
+
+      else{
+
+        # if the core longer than the standardization depth we estimate the stock until that depth
+        if (max(Data$EMax)>=Depth)
+
+        {
+          Data<-Data[c(1:(length(which(Data$EMax <=Depth))+1)),]
+
+          if(nrow(Data)<3) next
+
+          else{
+
+          BCS[i,4]<-(((sum(Data[c(1:(nrow(Data)-1)),which(colnames(Data)=="OCgcm2")]))+
+                        (Data[nrow(Data),which(colnames(Data)=="OCgcm2")]/((max(Data$EMax)-Data[(nrow(Data)-1),which(colnames(Data)=="EMax")]))
+                         *(Depth-Data[(nrow(Data)-1),which(colnames(Data)=="EMax")]))))}}
+
+        #if core shorter than than the standardization depth we model the OC acumulated mass with depth and predict the stock at that depth
+        else {
+
+          Data <-Data |> dplyr::mutate (OCM = cumsum(OCgcm2))
+          model<-lm(OCM ~ EMax, data=Data)
+          BCS[i,4]<-coef(model)[1] + Depth*coef(model)[2]}}
+    }}
+  return(BCS)
+}
diff --git a/R/test_extrapolation.R b/R/test_extrapolation.R
new file mode 100644
index 0000000..d66e375
--- /dev/null
+++ b/R/test_extrapolation.R
@@ -0,0 +1,174 @@
+#' Test differences between observed and extrapolated stocks
+#'
+#' @description subset those cores that reach the standardization depth and estimates the stock (observed stock), estimate the stock from the linear relation of organic carbon accumulated mass and depth using the 90, 75, 50 and 25% top length of the indicated standardization depth. Compares the observed stock with the estimated stocks by extrapolation.
+#'
+#' @param df A [data.frame] with, at least, columns: CoreID, DMin (minimum depth of the sample), DMax (maximum depth of the sample), DBD (dry bulk density), POC (organic carbon %)
+#' @param Depth standardization soil depth, by default 100 cm.
+#'
+#' @return A [data.frame] with the observed and extrapolated stocks. A plot with comparisons.
+#' @export
+#'
+#' @examples
+
+test_extrapolation <- function(df = NULL, Depth = 100) {
+
+  # class of the dataframe
+  if (is.data.frame(df)==FALSE) {stop("The data provided is not class data.frame, please chaeck data and transforme")}
+  if (is.numeric(Depth)==FALSE) {stop("The Depth provided is not class numeric, please chaeck data and transforme")}
+
+  # name of the columns
+  if ("CoreID" %in% colnames(df)==FALSE) {stop("There is not column named CoreID. Please, check necessary columns in functions documentation")}
+  if ("DMin" %in% colnames(df)==FALSE) {stop("There is not column named Min.D. Please, check necessary columns in functions documentation")}
+  if ("DMax" %in% colnames(df)==FALSE) {stop("There is not column named Max.D. Please, check necessary columns in functions documentation")}
+  if ("DBD" %in% colnames(df)==FALSE) {stop("There is not column named DBD. Please, check necessary columns in functions documentation")}
+  if ("fOC" %in% colnames(df)==FALSE) {stop("There is not column named fOC. Please, check necessary columns in functions documentation")}
+
+  # class of the columns
+  if (is.numeric(df$DMin)==FALSE) {stop("Minimum depth data is not class numeric, please check")}
+  if (is.numeric(df$DMax)==FALSE) {stop("Maximum depth data is not class numeric, please check")}
+  if (is.numeric(df$DBD)==FALSE) {stop("Dry Bulk Density data is not class numeric, please check")}
+  if (is.numeric(df$fOC)==FALSE) {stop("Organic carbon data is not class numeric, please check")}
+
+
+  # we select those cores larger than the standard depth
+
+  columns<-colnames(df)
+  DataE = data.frame(matrix(nrow = 0, ncol = length(columns)))
+  colnames(DataE) = columns
+
+
+
+   X <- split(df, df$CoreID)
+
+  for (i in 1:length(X)) {
+    Data <- as.data.frame(X[i])
+    colnames(Data) <- colnames(df)
+
+    if (max(Data$DMax) < Depth) next
+
+    DataE<-rbind(DataE,Data)}
+
+
+   # estimate the stock at the standar depth
+
+  ExtS <- data.frame(
+    CoreID = character(),
+    EXT.100 = numeric(),
+    EXT.90 = numeric(),
+    EXT.75 = numeric(),
+    EXT.50 = numeric(),
+    EXT.25 = numeric()
+  )
+
+
+    hundreth<- Depth #100% of the extrapolation length
+    ninety <- Depth * 0.9 #90% of the extrapolation length
+    seventy <- Depth * 0.75 #90% of the extrapolation length
+    fifhty <- Depth * 0.50 #90% of the extrapolation length
+    twentififty <- Depth * 0.25 #90% of the extrapolation length
+
+
+    #estimate observed stock
+    temp100<-estimate_stock (DataE, Depth=hundreth)
+    ExtS[c(1:nrow(temp100)),"CoreID"]<-temp100$CoreID
+    ExtS$EXT.100<-temp100$Stock
+
+   # estimate stock with a 90, 75, 50 and 25 percentage of the standard depth
+
+    X <- split(DataE, DataE$CoreID)
+
+    for (i in 1:length(X)) {
+      Data <- as.data.frame(X[i])
+      colnames(Data) <- colnames(DataE)
+
+    Data<-Data[!is.na(Data$fOC),]
+    Data<-estimate_h(Data)
+
+    #estimation of carbon g cm2 per sample, OCgcm2= carbon density (g cm3) by thickness (h)
+    Data <-Data |> dplyr::mutate (OCgcm2 = DBD*(fOC/100)*h)
+
+    #estimate organic carbon accumulated mass
+    Data <-Data |> dplyr::mutate (OCM = cumsum(OCgcm2))
+
+    Data<- subset(Data,Data$DMax<=ninety)
+
+    if (nrow(Data)>3){
+      model90<-lm(OCM ~ DMax, data=Data)
+      ExtS[i,3]<-coef(model90)[1] + 100*coef(model90)[2]}
+
+    Data<- subset(Data,Data$DMax<=seventy)
+    if (nrow(Data)>3){
+      model75<-lm(OCM ~ DMax, data=Data)
+      ExtS[i,4]<-coef(model75)[1] + 100*coef(model75)[2]}
+
+    Data<- subset(Data,Data$DMax<=fifhty)
+    if (nrow(Data)>3){
+      model50<-lm(OCM ~ DMax, data=Data)
+      ExtS[i,5]<-coef(model50)[1] + 100*coef(model50)[2]}
+
+    Data<- subset(Data,Data$DMax<=twentififty)
+    if (nrow(Data)>3){
+      model25<-lm(OCM ~ DMax, data=Data)
+      ExtS[i,6]<-coef(model25)[1] + 100*coef(model25)[2]}}
+
+
+  #############
+  # we test the correlation between the stock at 1m estimated from real data and the models
+  # and the error of the models
+  ##############
+
+
+  CorMat <- cor(na.omit(ExtS[, c(2:6)]), method = "pearson")
+
+
+  corrplot::corrplot(
+    CorMat,
+    type = "upper",
+    order = "hclust",
+    tl.col = "black",
+    tl.srt = 45
+  )
+
+  #write.csv(CorMat,file.path(Folder,"Corr.Extrapolation.csv"),sep=";", dec=",")
+
+
+  ExtS <- ExtS |> dplyr::mutate (Error.90 = (abs(EXT.100 - EXT.90) * 100) / EXT.100)
+  ExtS <- ExtS |> dplyr::mutate (Error.75 = (abs(EXT.100 - EXT.75) * 100) / EXT.100)
+  ExtS <- ExtS |> dplyr::mutate (Error.50 = (abs(EXT.100 - EXT.50) * 100) / EXT.100)
+  ExtS <- ExtS |> dplyr::mutate (Error.25 = (abs(EXT.100 - EXT.25) * 100) / EXT.100)
+
+  summary(ExtS)
+
+
+  #Global Error
+  m.ExtS <- ExtS[, c(1, 7:10)]
+  m.ExtS <- reshape::melt(m.ExtS, id = c("CoreID"))
+
+  library("ggplot2")
+
+  P1<-ggplot2::ggplot(m.ExtS, aes(variable, value)) + ylab("% of deviation from observed value") + xlab("% of standar depth") +
+    geom_boxplot() +
+    geom_jitter() +
+    scale_x_discrete(labels=c("Error.90" = "90%", "Error.75" = "75%", "Error.50" = "50%", "Error.25" = "25%"))+
+    theme(
+      #axis.title.x = element_blank(),
+      #axis.text.x = element_blank(),
+      axis.ticks.x = element_blank()
+    )
+
+  P2<-ggplot2::ggplot(ExtS, aes(ExtS[, 2], ExtS[, 3])) + xlab("Observed Stock") + ylab("Extrapolated stock") +
+    geom_point(aes(ExtS[, 2], ExtS[, 3], color = "90%"), size = 2) +
+    geom_point(aes(ExtS[, 2], ExtS[, 4], color = "75%"), size = 2) +
+    geom_point(aes(ExtS[, 2], ExtS[, 5], color = "50%"), size = 2) +
+    geom_point(aes(ExtS[, 2], ExtS[, 6], color = "25%"), size = 2) +
+    theme(text = element_text(size = 15)) +
+    #geom_text_repel(aes(label=A[,1]), size=4)+
+    xlim(0, 5) + ylim(0, 5) +
+    geom_abline()
+
+Extrapolation_plot<-gridExtra::grid.arrange(P1,P2, ncol=2)
+
+return(ExtS)
+return(Extrapolation_plot)
+
+}
diff --git a/R/transform_om_oc.R b/R/transform_om_oc.R
new file mode 100644
index 0000000..54924ea
--- /dev/null
+++ b/R/transform_om_oc.R
@@ -0,0 +1,145 @@
+#' Organic carbon estimation
+#'
+#' @description Model linear relation between organic matter and organic carbon and estimate organic carbon values from organic matter data
+#'
+#' @param df A [data.frame] with, at least, columns CoreID, Ecosystem, Specie, SiteID, OM, and OC.
+#'
+#' @return the initial [data.frame] + one column with organic carbon values (fOC = final organic carbon)
+#' @export
+#'
+#' @examples
+
+transform_om_oc <- function(df = NULL) {
+
+  #### Estimate df linear model to predict OC from OM for each ecosystem, specie and station ###
+  #skip those models with R2<0.5 or P value>0.05
+
+  # check if the class of the parameters objects, the names and class of the columns of df are correct
+
+  # class of the dataframe
+  if (is.data.frame(df)==FALSE) {stop("The data provided is not class data.frame, please check data and transforme")}
+
+  # name of the columns
+  if ("SiteID" %in% colnames(df)==FALSE) {stop("There is not column named SiteID. Please, check necessary columns in functions documentation")}
+  if ("CoreID" %in% colnames(df)==FALSE) {stop("There is not column named CoreID. Please, check necessary columns in functions documentation")}
+  if ("Ecosystem" %in% colnames(df)==FALSE) {stop("There is not column named Ecosystem. Please, check necessary columns in functions documentation")}
+  if ("Specie" %in% colnames(df)==FALSE) {stop("There is not column named Specie. Please, check necessary columns in functions documentation")}
+  if ("OM" %in% colnames(df)==FALSE) {stop("There is not column named OM. Please, check necessary columns in functions documentation")}
+  if ("OC" %in% colnames(df)==FALSE) {stop("There is not column named OC. Please, check necessary columns in functions documentation")}
+
+  # class of the columns
+  if (is.numeric(df$OM)==FALSE) {stop("Organic matter data is not class numeric, please check")}
+  if (is.numeric(df$OC)==FALSE) {stop("Organic carbon data is not class numeric, please check")}
+
+
+  #create df list of dataframes with data from each ecosystem, specie, and station (site)
+  X<-split(df, df$Ecosystem)
+  X2<-split(df, df$Specie)
+  X3<-split(df, df$SiteID)
+  X<-c(X,X2,X3)
+  length(X)
+
+  #create empty table to log model data
+  OCEst <- data.frame(ID=character(),
+                      R2=numeric(),
+                      P=numeric(),
+                      f=numeric(),
+                      int=numeric(),
+                      slope=numeric()
+  )
+
+  for(i in 1:length(X)) {
+    OCEst[i,1]<-names(X[i])
+    Data<-as.data.frame(X[i])
+    colnames(Data)<-colnames(df)
+
+
+    #we only model those ecosystem, specie, and station with more than 5 samples were OC and LOI were mwasured
+    if((nrow(Data |> dplyr::filter_at(dplyr::vars(OM,OC),dplyr::all_vars(!is.na(.)))))<5) next
+
+
+    else{
+
+      model<-lm(OC ~ OM, data=Data)
+
+      if(summary(model)$r.squared<0.5 | broom::glance(model)$p.value>0.05 ) next
+
+      else{
+
+        OCEst[i,2]<-summary(model)$r.squared
+        OCEst[i,3]<-broom::glance(model)$p.value
+        OCEst[i,4]<-summary(model)$fstatistic[1]
+        OCEst[i,5]<-model$coefficients[1]
+        OCEst[i,6]<-model$coefficients[2]
+
+      }}
+  }
+
+  rownames(OCEst)<-OCEst$ID
+
+  # write.csv(OCEst,file.path(Folder,"OM-OC_lm.csv"),sep=";", dec=",")
+
+  ## Use the models estimated to predict OC from OM content for those samples with no OC data
+  #If there is OC data for that sample, keep original OC data,
+  #else use function estimated for that SiteID
+  #else function for specie
+  #else function for ecosystem
+
+  df$fOC <- NA
+
+  for(i in 1:nrow(df)) {
+
+    if (is.na(df[i,which( colnames(df)=="OC" )])==FALSE)
+    {df[i,which( colnames(df)=="fOC" )]<-df[i,which( colnames(df)=="OC" )]}
+
+    else { if (is.na(OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="SiteID" )])),which(colnames(OCEst)=="int")])==FALSE)
+
+    {df[i,which( colnames(df)=="fOC" )]<-
+      OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="SiteID" )])),which(colnames(OCEst)=="int" )]+
+      (OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="SiteID" )])),which(colnames(OCEst)=="slope" )])*
+      df[i,which( colnames(df)=="OM" )] }
+
+      else{ if (is.na(OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="Specie" )])),which(colnames(OCEst)=="int")])==FALSE)
+
+      {df[i,which( colnames(df)=="fOC" )]<-
+        OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="Specie" )])),which(colnames(OCEst)=="int" )]+
+        (OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="Specie" )])),which(colnames(OCEst)=="slope" )])*
+        df[i,which( colnames(df)=="OM" )]}
+
+        else {df[i,which( colnames(df)=="fOC" )]<-
+          OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="Ecosystem" )])),which(colnames(OCEst)=="int" )]+
+          (OCEst[which(rownames(OCEst)==(df[i,which( colnames(df)=="Ecosystem" )])),which(colnames(OCEst)=="slope" )])*
+          df[i,which( colnames(df)=="OM" )]}}}
+
+  }
+
+  ## when OM very low, the estimation can give negative values of OC. We change negative values for 0.
+
+  df$fOC[df$fOC < 0] <- 0
+
+
+  if (sum(is.na(df$fOC) & !is.na(df$OM))>=1) {
+  message(
+    paste("Howard et al (2014) applied to",
+          sum(is.na(df$fOC) & !is.na(df$OM) ), "observations")
+  )}
+
+  df <- df |>
+    dplyr::mutate(
+      fOC = dplyr::case_when(
+        is.na(fOC) & OM <= 0.2 & Ecosystem == "Seagrass" ~
+          0.4 * OM - 0.21,
+        is.na(fOC) & OM > 0.2 & Ecosystem == "Seagrass" ~
+          0.43 * OM - 0.33,
+        is.na(fOC) & Ecosystem == "Salt Marsh" ~
+          0.47 * OM + 0.0008 * OM^2,
+        is.na(fOC) & Ecosystem == "Mangrove" ~
+          0.415 * OM - 2.89,
+        T ~ fOC
+      )
+    )
+
+  return(df)
+  #return(OCEst)
+
+}