install.packages("hydrographr")
install.packages("data.table")
install.packages("dplyr")
install.packages("terra")
install.packages("tools")
install.packages("stringr")
install.packages("ranger")
install.packages("leaflet")
install.packages("leafem")
install.packages("sf")


library((sf))
library(hydrographr)
library(data.table)
library(dplyr)
library(terra)
library(tools)
library(stringr)
library(ranger)
library(leaflet)
library(leafem)



#---working directory---
# for Linux / server
wdir<-"/mnt/shared/eakindele"

setwd(wdir)

dir.create("data")


#----Species data-----
spdata<-read.csv("Guineo_Congolian_coordinatecleaned_taxized.csv", header = TRUE )


#---Get bounding box (min long., min. lat, max. long., max. lat.)
bbox <- c(-14.450, -10.6185, 30.907, 13.0300)

m <- leaflet() %>%
  addProviderTiles('Esri.WorldShadedRelief') %>%
  setMaxBounds(bbox[1], bbox[2], bbox[3], bbox[4]) %>%
  addCircles(data = spdata, color = "purple", lat = spdata$decimalLatitude,
             lng = spdata$decimalLongitude) # data = spdata


m

------------------------------------------------------------------------------------------
#-----Get tile_ids-----------
tile_id <- get_tile_id(data = spdata, lon = "decimalLongitude", lat = "decimalLatitude")
tile_id
tile_id <- tile_id[c(2,5,6,8,9)]

# Variables in raster format
vars_tif <- "sub_catchment"
# Variables in vector format. This layerholds the information on stream length
vars_gpkg <- "order_vect_segment" # order_vect_point

# Download the .tif tiles of the desired variables
download_tiles(variable = vars_tif, tile_id = tile_id, file_format = "tif",
               download_dir = "data")

# Download the .gpkg tiles of the desired variables
download_tiles(variable = vars_gpkg, tile_id = tile_id, file_format = "gpkg",
               download_dir = "data")



# Define output directory for merged files------
layer_dir <- paste0(wdir, "/data/final_layers")


# Create the directory
dir.create(layer_dir)


dirs_h90m <- list.dirs(paste0(wdir, "/data"),
                       recursive = TRUE, full.names = TRUE)

dirs_h90m <- dirs_h90m[grep("tiles20d", dirs_h90m)]

for(idir in dirs_h90m) {
  # only choose rasters
  tiles <- list.files(idir, pattern = ".tif$", full.names = TRUE)
  for(itile in tiles) {
    crop_to_extent(
      raster_layer = itile,
      bounding_box = bbox,
      out_dir = idir,
      file_name = paste0(str_remove(basename(itile), ".tif"), "_crop.tif"),
      read = FALSE,
      quiet = TRUE)
  }
}



#Merging

# This can be done sequentially:
idir <- dirs_h90m[1]
merge_tiles(tile_dir = idir,
            tile_names = list.files(idir, full.names = FALSE,
                                    pattern = "_crop.tif"),
            out_dir = layer_dir,
            file_name = "spi.tif",
            read = FALSE)

idir <- dirs_h90m[3]
merge_tiles(tile_dir = idir,
            tile_names = list.files(idir, full.names = FALSE,
                                    pattern = "_crop.tif"),
            out_dir = layer_dir,
            file_name = "slope_curv_max_dw_cel.tif",
            read = FALSE)

idir <- dirs_h90m[4]
merge_tiles(tile_dir = idir,
            tile_names = list.files(idir, full.names = FALSE,
                                    pattern = "_crop.tif"),
            out_dir = layer_dir,
            file_name = "sub_catchment.tif",
            read = FALSE)

-----------------------------------------------------------------------------------
#---Extraction of sub-catcment IDs-----
spdata_ids <- extract_ids(data = spdata, lon = "decimallongitude", lat = "decimallatitude",
                          id = "occurrence_id", quiet = FALSE,
                          subc_layer = paste0(layer_dir, "/sub_catchment.tif"))

spdata_ids <- extract_ids(data = spdata, lon = "decimallongitude", lat = "decimallatitude",
                          id = "occurrence_id", quiet = FALSE,
                          subc_layer = paste0(wdir, "/sub_catchment.tif"))

spdata_ids
summary(spdata_ids)

# Attach the species name and year again
# First remove the lon/lat before joining --> avoids duplicate lon/lat columns
spdata_ids <- spdata_ids[subset(spdata, select=-c(decimallongitude, decimallatitude)), on = "occurrence_id"]

write.csv(spdata_ids, row.names = FALSE, "/mnt/shared/eakindele/presence_subcatchments_coordinates.csv")

----------------------------------------------------------------------------------
  #----Extraction of gpkg variables-----

#Recall vector files
order_vect_tiles <- paste0(wdir, "/data/r.stream.order/order_vect_tiles20d")

#---reading vector files----(order_vect_point changed to order_vect_segment)

stats_gpkg_h16v06 <- read_geopackage(
  "data/r.stream.order/order_vect_tiles20d/order_vect_segment_h16v06.gpkg",
  import_as = "data.table") %>%
  rename('subc_id' = 'stream')

stats_gpkg_h18v06 <- read_geopackage(
  "data/r.stream.order/order_vect_tiles20d/order_vect_segment_h18v06.gpkg",
  import_as = "data.table") %>%
  rename('subc_id' = 'stream')

stats_gpkg_h20v06 <- read_geopackage(
  "data/r.stream.order/order_vect_tiles20d/order_vect_segment_h20v06.gpkg",
  import_as = "data.table") %>%
  rename('subc_id' = 'stream')

stats_gpkg_h18v08 <- read_geopackage(
  "data/r.stream.order/order_vect_tiles20d/order_vect_segment_h18v08.gpkg",
  import_as = "data.table")%>%
  rename('subc_id' = 'stream')

stats_gpkg_h20v08 <- read_geopackage(
  "data/r.stream.order/order_vect_tiles20d/order_vect_segment_h20v08.gpkg",
  import_as = "data.table")%>%
  rename('subc_id' = 'stream')

stats_gpkg_h16v08 <- read_geopackage(
  "data/r.stream.order/order_vect_tiles20d/order_vect_segment_h16v08.gpkg",
  import_as = "data.table")%>%
  rename('subc_id' = 'stream')



#---select columns of ineterest-----(for spatial inventory data)
stats_gpkg_h16v06 <- stats_gpkg_h16v06 %>%
  dplyr::select(c("subc_id", "length", "strahler", "cum_length", "flow_accum", "out_dist", "source_elev", "outlet_elev"))

stats_gpkg_h18v06 <- stats_gpkg_h18v06 %>%
  dplyr::select(c("subc_id", "length", "strahler", "cum_length", "flow_accum", "out_dist", "source_elev", "outlet_elev"))

stats_gpkg_h20v06 <- stats_gpkg_h20v06 %>%
  dplyr::select(c("subc_id", "length", "strahler", "cum_length", "flow_accum", "out_dist", "source_elev", "outlet_elev"))

stats_gpkg_h18v08 <- stats_gpkg_h18v08 %>%
  dplyr::select(c("subc_id", "length", "strahler", "cum_length", "flow_accum", "out_dist", "source_elev", "outlet_elev"))

stats_gpkg_h20v08 <- stats_gpkg_h20v08 %>%
  dplyr::select(c("subc_id", "length", "strahler", "cum_length", "flow_accum", "out_dist", "source_elev", "outlet_elev"))

stats_gpkg_h16v08 <- stats_gpkg_h16v08 %>%
  dplyr::select(c("subc_id", "length", "strahler", "cum_length", "flow_accum", "out_dist", "source_elev", "outlet_elev"))



dt_1<-stats_gpkg_h16v06 
dt_2<-stats_gpkg_h18v06 
dt_3<-stats_gpkg_h20v06 
dt_4<-stats_gpkg_h18v08 
dt_5<-stats_gpkg_h20v08 
dt_6<-stats_gpkg_h16v08 


rm(stats_gpkg_h16v06,stats_gpkg_h18v06, stats_gpkg_h20v06, stats_gpkg_h18v08, stats_gpkg_h20v08, stats_gpkg_h16v08,
   dt_1, dt_2, dt_3, dt_4, dt_5, dt_6)

gc()



#----Then we join the six.gpkg databases 

stats.gpkg <- rbind(dt_1, dt_2, dt_3, dt_4, dt_5, dt_6) 
 

#----Save stats_gpkg
#saveRDS(stats_gpkg, file= paste0(wdir, "stats_gpkg.rds"))
saveRDS(stats_gpkg_spatialInventory, file= paste0(wdir, "/stats_gpkg"))



#------To load it back into R
stats_gpkg<-readRDS(paste0(wdir, "stats_gpkg.rds"))
-----------------------------------------------------------------------------------
  #----subset gpkg variables (Strahler orders, elevations) for subc_id------
gpkg_subset <- stats_gpkg %>%
  filter(subc_id %in% subt)

summary(gpkg_subset)

#----attach gpkg data-elevations, stream orders. etc-----
spdata = merge(spdata_ids, gpkg_subset, by="subc_id")

write.csv(spdata, row.names = FALSE, "/mnt/shared/eakindele/GuineoCongolian_spdata.csv")

