Add project selection and tweaks

steffilazerte · Jan 24, 2024 · 1187a56 · 1187a56
1 parent 724b4f4
commit 1187a56
Show file tree

Hide file tree

Showing 17 changed files with 6,348 additions and 485 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@
 Data/
 *_files
 */02_range_maps_files/figure-html/*
-*cache/
+*cache/
+*.motus
diff --git a/00_workflow.qmd b/00_workflow.qmd
@@ -1,5 +1,5 @@
 ---
-title: Workflow
+title: Plans
 ---
 
 ## Order of operations

diff --git a/01_select_projects.qmd b/01_select_projects.qmd
@@ -0,0 +1,108 @@
+---
+title: Selecting Projects
+---
+
+Here we explore a list of open or semi open Motus projects to select more
+project ids which we can use in our pilot study.
+
+## Setup
+
+```{r}
+#| message: false
+source("XX_setup.R")
+library(readxl)
+```
+
+
+## Cleaning and Filtering
+
+**Cleaning**
+
+The species names (English and scientific) listed here aren't always consistent,
+so we'll omit them and use only the species IDs to match them with the 
+NatureCounts metadata from [`XX_setup`](XX_setup.html).
+
+Then we consolidate the deployments in those projects as some listed some
+deployments under one species name and others under another (even if the species
+was the same)
+
+**Filtering**
+
+- keep only `access == 1` which are fully public projects 
+- omit species ID 129470 which are listed as attached to a Human...(?)
+- omit projects with non-Canadian species
+
+```{r}
+p_sp <- read_excel("Data/Raw/TagsSpeciesProject.xlsx") |>
+  rename_with(.cols = contains("No column"), \(x) "access") |>
+  filter(access == 1,             # Only completely open projects
+         speciesID != 129470) |>  # Don't worry about Human tags :D
+  select(-speciesName, -motusEnglishName, -access) |> # Species Names are not consistent
+  summarize(across(everything(), sum), .by = c("tagProjectID", "speciesID")) |>
+  left_join(select(species, species_id, scientific_name, english_name),
+            by = c("speciesID" = "species_id")) |>
+  mutate(good = !is.na(scientific_name),
+         other = str_detect(english_name, "Eurasian|European|Elaenia")) |>
+  group_by(tagProjectID) |>
+  filter(all(!other | is.na(other))) |> # Omit projects with non-Canadian species
+  mutate(good_tags = sum(num_deployments[good]),
+         prop_good_tags =  good_tags / sum(num_deployments)) |>
+  ungroup() |>
+  select(-other) |>
+  arrange(desc(prop_good_tags))
+
+gt(p_sp) |>
+  fmt_number("prop_good_tags", decimals = 2)
+```
+
+## Summarize
+
+Now we can summarize these projects by how many species, deployments (tags) and
+the average number of deployments per species. 
+
+We'll aim to include projects with a bread of species but also reasonable coverage,
+so we exclude projects with less than 100% passerines and fewer than three species.
+```{r}
+p <- p_sp |>
+  filter(prop_good_tags == 1) |>
+  group_by(tagProjectID) |>
+  summarize(total_tags = sum(num_deployments),
+            n_species = n_distinct(speciesID),
+            mean_tags_per_species = mean(num_deployments),
+            species = list(unique(english_name))) |>
+  filter(n_species > 3) |>
+  arrange(desc(mean_tags_per_species), desc(n_species), desc(total_tags))
+
+gt(p) |>
+  fmt_number(columns = "mean_tags_per_species", decimals = 1)
+```
+
+
+## Data sizes
+
+Now, we can check the amount of data per project (see what we're in for!)
+
+For reference, 7,006,847,799 bytes is ~ 7 GB
+
+```{r}
+#| cache: true
+dir.create("Data/Temp")
+status <- map(
+  set_names(p$tagProjectID), 
+  \(x) tellme(x, dir = "Data/Temp",  new = TRUE)) |>
+  list_rbind(names_to = "proj_id")
+unlink("Data/Temp", recursive = TRUE)
+```
+
+So this, isn't too bad, data-wise, I think we could use all projects.
+
+```{r}
+status |> 
+  mutate(Megabytes = numBytes / 1000000) |>
+  arrange(desc(numBytes)) |>
+  gt() |>
+  fmt_number(decimals = 0) 
+```
+
+
+
diff --git a/01_download.qmd → 02_download.qmd b/01_download.qmd → 02_download.qmd
diff --git a/02_range_maps.qmd → 03_range_maps.qmd b/02_range_maps.qmd → 03_range_maps.qmd
diff --git a/03_basic_filters.qmd → 04_basic_filters.qmd b/03_basic_filters.qmd → 04_basic_filters.qmd
diff --git a/XX_setup.R b/XX_setup.R
@@ -35,7 +35,7 @@ projects <- setNames(projects, projects)
 # naturecounts::nc_metadata() # Update naturecounts taxonomy lists
 
 species <- naturecounts::meta_species_taxonomy() |>
-  filter(order_taxon == "Passeriformes")
+  filter(order_taxon %in% c("Passeriformes", "Piciformes"))
 
 # ---- functions ----
 source("XX_functions.R")
diff --git a/_freeze/02_download/execute-results/html.json b/_freeze/02_download/execute-results/html.json
diff --git a/docs/00_workflow.html b/docs/00_workflow.html
@@ -7,9 +7,9 @@
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 
 <meta name="author" content="Steffi LaZerte">
-<meta name="dcterms.date" content="2024-01-17">
+<meta name="dcterms.date" content="2024-01-24">
 
-<title>Motus tracking of urban migration stopovers - Workflow</title>
+<title>Motus tracking of urban migration stopovers - Plans</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
@@ -32,7 +32,7 @@
 <script src="site_libs/quarto-search/fuse.min.js"></script>
 <script src="site_libs/quarto-search/quarto-search.js"></script>
 <meta name="quarto:offset" content="./">
-<link href="./01_download.html" rel="next">
+<link href="./01_select_projects.html" rel="next">
 <script src="site_libs/quarto-html/quarto.js"></script>
 <script src="site_libs/quarto-html/popper.min.js"></script>
 <script src="site_libs/quarto-html/tippy.umd.min.js"></script>
@@ -82,7 +82,7 @@
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./00_workflow.html">Workflow</a></li><li class="breadcrumb-item"><a href="./00_workflow.html">Workflow</a></li></ol></nav>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./00_workflow.html">Workflow</a></li><li class="breadcrumb-item"><a href="./00_workflow.html">Plans</a></li></ol></nav>
         <a class="flex-grow-1" role="button" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">      
         </a>
       <button type="button" class="btn quarto-search-button" aria-label="" onclick="window.quartoOpenSearch();">
@@ -122,24 +122,30 @@
           <li class="sidebar-item">
   <div class="sidebar-item-container"> 
   <a href="./00_workflow.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text">Workflow</span></a>
+ <span class="menu-text">Plans</span></a>
+  </div>
+</li>
+          <li class="sidebar-item">
+  <div class="sidebar-item-container"> 
+  <a href="./01_select_projects.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text">Selecting Projects</span></a>
   </div>
 </li>
           <li class="sidebar-item">
   <div class="sidebar-item-container"> 
-  <a href="./01_download.html" class="sidebar-item-text sidebar-link">
+  <a href="./02_download.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text">Download/Update Data</span></a>
   </div>
 </li>
           <li class="sidebar-item">
   <div class="sidebar-item-container"> 
-  <a href="./02_range_maps.html" class="sidebar-item-text sidebar-link">
+  <a href="./03_range_maps.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text">Range Maps</span></a>
   </div>
 </li>
           <li class="sidebar-item">
   <div class="sidebar-item-container"> 
-  <a href="./03_basic_filters.html" class="sidebar-item-text sidebar-link">
+  <a href="./04_basic_filters.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text">Basic Filtering</span></a>
   </div>
 </li>
@@ -185,9 +191,9 @@ <h2 id="toc-title">On this page</h2>
 <!-- main -->
 <main class="content" id="quarto-document-content">
 
-<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./00_workflow.html">Workflow</a></li><li class="breadcrumb-item"><a href="./00_workflow.html">Workflow</a></li></ol></nav>
+<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="./00_workflow.html">Workflow</a></li><li class="breadcrumb-item"><a href="./00_workflow.html">Plans</a></li></ol></nav>
 <div class="quarto-title">
-<h1 class="title">Workflow</h1>
+<h1 class="title">Plans</h1>
 </div>
 
 
@@ -204,7 +210,7 @@ <h1 class="title">Workflow</h1>
     <div>
     <div class="quarto-title-meta-heading">Published</div>
     <div class="quarto-title-meta-contents">
-      <p class="date">January 17, 2024</p>
+      <p class="date">January 24, 2024</p>
     </div>
   </div>
 
@@ -623,8 +629,8 @@ <h2 class="anchored" data-anchor-id="order-of-operations">Order of operations</h
   <div class="nav-page nav-page-previous">
   </div>
   <div class="nav-page nav-page-next">
-      <a href="./01_download.html" class="pagination-link" aria-label="Download/Update Data">
-        <span class="nav-page-text">Download/Update Data</span> <i class="bi bi-arrow-right-short"></i>
+      <a href="./01_select_projects.html" class="pagination-link" aria-label="Selecting Projects">
+        <span class="nav-page-text">Selecting Projects</span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,4 +11,5 @@ @@
     Data/
     *_files
     */02_range_maps_files/figure-html/*
-    *cache/
+    *cache/
+    *.motus