1 Import files

1.1 PSE data

#'* Import the cleaned NuSEDS data matched with the cuid and streamid of the PSE *
nuseds_cuid_streamid <- import_mostRecent_file_fun(wd = paste0(wd_output,"/archive"), 
                                                   pattern = "2_nuseds_cuid_streamid")
## [1] "File imported: 2_nuseds_cuid_streamid_2026-03-12.csv ; Date modified: 2026-03-12 16:44:19.196857"
# head(nuseds_cuid_streamid)

nuseds_cuid_streamid$source_id <- "NuSEDS_20251103"
#'* Import the NuSEDS field definitions *
nuseds_fields_definitions <- nuseds_fields_definitions_fun(wd_references = wd_references_dropbox)
#'* Import the shape files for the Region boundaries *
#' Note that se_boundary/se_boundary.shp is used in 0_assign-regions.R. This is a
#' lighter shape file but contains the same regions. The code here works with 
#' se_boundary.shp.
# regions_shp <- st_read(paste0(wd_pop_indic_data_gis_dropbox,"/se_boundary_regions/se_boundary_regions.shp")) %>%
#   st_transform(crs = 4269)
# unique(regions_shp$regionname)
wd_geo_regions <- gsub("1_PROJECTS","5_DATA/Mapping/study areas/shapefiles/PSE_regions/",wd_X_Drive1_PROJECTS)

regions_shp <- st_read(paste0(wd_geo_regions,"se_boundary_regions_simple/se_boundary_regions_simple.shp")) %>%
  st_transform(crs = 4269)
## Reading layer `se_boundary_regions_simple' from data source 
##   `C:\Users\bcarturan\Salmon Watersheds Dropbox\Bruno Carturan\X Drive\5_DATA\Mapping\study areas\shapefiles\PSE_regions\se_boundary_regions_simple\se_boundary_regions_simple.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 9 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 273611.402 ymin: 368824.74 xmax: 1870608.39 ymax: 1907824.79
## Projected CRS: NAD83 / BC Albers
# unique(regions_shp$Region)

sf_use_s2(FALSE) # so that st_intersects() and st_simplify() can be used
#'* Import the geodatabase for the CU boundaries  *
wd_geo_cus <- gsub("1_PROJECTS","5_DATA/CUs_Master/GDB/",wd_X_Drive1_PROJECTS)

CUs_gdb <- st_read(paste0(wd_geo_cus,"PSF_CUs_Master.gdb")) %>%
  st_transform(crs = 4269)
## Multiple layers are present in data source C:\Users\bcarturan\Salmon Watersheds Dropbox\Bruno Carturan\X Drive\5_DATA\CUs_Master\GDB\PSF_CUs_Master.gdb, reading layer `PSF_CUs_updatedDec2021'.
## Use `st_layers' to list all layer names and their type in a data source.
## Set the `layer' argument in `st_read' to read a particular layer.
## Reading layer `PSF_CUs_updatedDec2021' from data source 
##   `C:\Users\bcarturan\Salmon Watersheds Dropbox\Bruno Carturan\X Drive\5_DATA\CUs_Master\GDB\PSF_CUs_Master.gdb' 
##   using driver `OpenFileGDB'
## Simple feature collection with 480 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -141.001678 ymin: 48.2968085 xmax: -118.162522 ymax: 68.7059621
## Geodetic CRS:  NAD83
# remove non-used region values
cond <- ! CUs_gdb$region_name %in% c("Not Assessed","Arctic/Mackenzie")
CUs_gdb <- CUs_gdb[cond,]
CUs_gdb <- st_simplify(x = CUs_gdb, dTolerance = .002) # to reduce computation time

1.2 SFU data

#'* Import the SFU data: escapement *
SFU_escap <- read_excel(paste(wd_data_dropbox,"SFU_Escapement_data_Master_PSF.xlsx",sep="/"), 
                       sheet = "Estimates") |>
  as.data.frame()

head(SFU_escap)
##   reynolds.me      nuseds.me   watershed.code year species estimate.number
## 1         Ada ADA COVE CREEK 915-213000-71100 2024    chum             810
## 2         Ada ADA COVE CREEK 915-213000-71100 2024    coho              NA
## 3         Ada ADA COVE CREEK 915-213000-71100 2024    pink              NA
## 4         Ada ADA COVE CREEK 915-213000-71100 2024 sockeye              NA
## 5         Ada ADA COVE CREEK 915-213000-71100 2023    chum             330
## 6         Ada ADA COVE CREEK 915-213000-71100 2023    coho               8
##   estimate.source      estimate.method
## 1          NuSEDS Area Under the Curve
## 2            <NA>                 <NA>
## 3            <NA>                 <NA>
## 4            <NA>                 <NA>
## 5          NuSEDS Area Under the Curve
## 6          NuSEDS       Expert Opinion
# nrow(SFU_escap) # 3853

SFU_escap$source_id <- "Reynolds_20260209"
#'* Import the SFU data: stream info *
#' Preious file: SFU_Escapement_PSF.xlsx
SFU_stream <- read_excel(paste(wd_data_dropbox,"SFU_Escapement_data_Master_PSF.xlsx",sep="/"), 
                        sheet = "Coordinates") |>
  as.data.frame()

head(SFU_stream)
##   SFU Stream Name             Nuseds Names Area         Watershed Code
## 1             Ada           ADA COVE CREEK    7       915-213000-71100
## 2          Beales     BEALE'S LAGOON CREEK    7 915-233500-13600-03100
## 3    Bullock Main BULLOCK CHANNEL #2 CREEK    7       915-381700-55200
## 4  Bullock Square                     <NA>    7       915-381700-62200
## 5         Cheenis            CHAMISS CREEK    7             910-448100
## 6          Clatse             CLATSE CREEK    7             910-385700
##   Latitude_Longitude (River Mouth) UTM Easting UTM Northing
## 1       52°03'23.1"N 128°03'04.9"W      565099      5767640
## 2       52°10'59.5"N 127°58'57.3"W      569573      5782313
## 3       52°24'09.1"N 128°04'37.4"W      562791      5806212
## 4       52°21'40.0"N 128°03'52.1"W      563598      5801796
## 5       52°29'16.0"N 128°06'28.9"W      560566      5815666
## 6       52°20'22.1"N 127°50'26.6"W      579193      5798966

Some checks and modifications:

# Remove rows with NAs
cond <- is.na(SFU_escap$estimate.number) | SFU_escap$estimate.number == "NA"
SFU_escap <- SFU_escap[!cond,]
sum(cond) # 1428
## [1] 1428
# CHECK of duplicated rows --> should all be false
any(duplicated(SFU_escap))
## [1] FALSE
any(duplicated(SFU_escap[,c("reynolds.me","year","species","estimate.number")]))
## [1] FALSE
# CHECK that the stream names match between the two datasets
all(SFU_escap$reynolds.me %in% SFU_stream$SFU_Stream_Name)
## [1] FALSE
all(SFU_stream$SFU_Stream_Name %in% SFU_escap$reynolds.me)
## [1] TRUE
cond <- ! SFU_escap$reynolds.me %in% SFU_stream$`SFU Stream Name`
SFU_escap$reynolds.me [cond] |> unique()
## [1] "Beales Left"        "Beales Right"       "Fell Creek"        
## [4] "Mosquito Bay Left"  "Mosquito Bay Right"
cond <- ! SFU_stream$`SFU Stream Name` %in% SFU_escap$reynolds.me
SFU_stream$`SFU Stream Name`[cond]
## [1] "Fell"
# "Fell"

# unique(SFU_stream$SFU_Stream_Name)
# "Beales Left" & "Beales Right" --> "Beales
# "Fell Creek" --> "Fell
#  "Mosquito Bay Left" & "Mosquito Bay Right" --> "Mosquito Bay"

cond <- SFU_escap$reynolds.me == "Fell Creek"
SFU_escap$reynolds.me[cond] <- "Fell"

We remove extra data for Beales Left & Right and Mosquito Bay Left and Right because these counts should be combined and the combined counts are also in the data set, as expressed in Arianne Nickels’ email: Please note that we split both Mosquito bay and Beales into two streams, so the numbers from “Mosquito bay” are a combination of both “Mosquito bay left” and “Mosquito bay right”. This is the same for Beales. The reason is because the streams both share an estuary but then split off once you hit the tree line. DFO considers them one stream, so it might be easiest to stick to that. I thought I’d include the combined data and the split data just in case.”

cond_remove <- SFU_escap$reynolds.me %in% c("Beales Left","Beales Right",
                                            "Mosquito Bay Left","Mosquito Bay Right")
SFU_escap <- SFU_escap[!cond_remove,]

all(SFU_escap$reynolds.me %in% SFU_stream$`SFU Stream Name`)
## [1] TRUE
all(SFU_stream$`SFU Stream Name` %in% SFU_escap$reynolds.me)
## [1] TRUE

1.3 Find ESTIMATE_CLASSIFICATION

#' Find for each value in estimate.method the corresponding value in the fields
# https://www.salmonexplorer.ca/methods/analytical-approach.html#data-quality

# correct methods
unique(SFU_escap$estimate.method)
## [1] "Area Under the Curve"        "Expert Opinion"             
## [3] "Peak live + Dead"            "Peak Live + Dead"           
## [5] "Unknown Estimate Method"     "Other Estimate Method"      
## [7] "Fixed Site Census"           "Peak Live + Cumulative Dead"
unique(nuseds_cuid_streamid$ESTIMATE_METHOD)
##  [1] "Fence Count"                    "Mark & Recapture: Petersen"    
##  [3] "Not Applicable"                 "Peak Live + Dead"              
##  [5] "Area Under the Curve"           "Unknown Estimate Method"       
##  [7] "Other Estimate Method"          "Peak Live * Expansion"         
##  [9] "Addition/Subtraction"           "Multiplication/Division"       
## [11] "Redd Count"                     "Sonar-ARIS"                    
## [13] "Resistivity Counter"            "Combined Methods"              
## [15] "Calibrated Time Series"         "(Peak Live+Cum Dead)*Expansion"
## [17] "Expert Opinion"                 "Peak Live + Cumulative Dead"   
## [19] "Aerial Survey"                  "Video Counter"                 
## [21] "Sonar-DIDSON"                   "Cumulative New"                
## [23] "Mark & Recapture: Bayesian"     "Mark & Recapture: Open Model"  
## [25] "Lake Expansion"
SFU_escap$estimate.method[SFU_escap$estimate.method == "Peak live + Dead"] <- "Peak Live + Dead"

estimate.method_SFU <- unique(SFU_escap$estimate.method)
estimate.method_SFU[! estimate.method_SFU %in% unique(nuseds_cuid_streamid$ESTIMATE_METHOD)]
## [1] "Fixed Site Census"
SFU_escap$ESTIMATE_CLASSIFICATION <- NA
SFU_escap$stream_survey_quality <- NA

cond <- SFU_escap$estimate.method == "Fixed Site Census"
SFU_escap[cond,]
##      reynolds.me                   nuseds.me   watershed.code year species
## 1325 Fannie Left FANNIE COVE LEFT HAND CREEK 915-213000-68700 2022    pink
##      estimate.number estimate.source   estimate.method         source_id
## 1325             300          NuSEDS Fixed Site Census Reynolds_20260209
##      ESTIMATE_CLASSIFICATION stream_survey_quality
## 1325                      NA                    NA
#' For metadata in SFU_Escapement_data_Master_PSF.csv:
#' "Fixed Site Census: Combining one or more raw observations into a single estimate
#' (e.g. add all daily fence observation SIL to create a single annual estimate)."
#' --> Medium-Low: Low to moderate effort (1-4 trips), known survey method --> RELATIVE ABUNDANCE (TYPE-4)
SFU_escap$ESTIMATE_CLASSIFICATION[cond] <- "RELATIVE ABUNDANCE (TYPE-4)"
SFU_escap$stream_survey_quality[cond] <- "Medium-Low"


# for the rest of the values:
methods_SFU_l <- sapply(estimate.method_SFU,function(m){
  cond <- nuseds_cuid_streamid$ESTIMATE_METHOD == m
  if(any(cond)){
    out <- list()
    x <- table(nuseds_cuid_streamid$ESTIMATE_CLASSIFICATION[cond])
    x <- rev(sort(x))
    out[[1]] <- x
    
    x <- table(nuseds_cuid_streamid$stream_survey_quality[cond])
    x <- rev(sort(x))
    out[[2]] <- x
    
    names(out) <- c("ESTIMATE_CLASSIFICATION","stream_survey_quality")
    
  }else{
    out <- NA
  }
  return(out)
})

val <- "Area Under the Curve"
methods_SFU_l[[val]]$ESTIMATE_CLASSIFICATION
## 
##           RELATIVE ABUNDANCE (TYPE-3)           RELATIVE ABUNDANCE (TYPE-4) 
##                                  5081                                  3213 
##           RELATIVE ABUNDANCE (TYPE-5)               TRUE ABUNDANCE (TYPE-1) 
##                                   643                                   313 
##               TRUE ABUNDANCE (TYPE-2)                               UNKNOWN 
##                                   278                                   208 
##             PRESENCE-ABSENCE (TYPE-6) RELATIVE: CONSTANT MULTI-YEAR METHODS 
##                                   174                                   143 
##  RELATIVE: VARYING MULTI-YEAR METHODS 
##                                     1
methods_SFU_l[[val]]$stream_survey_quality
## 
##      Medium  Medium-Low         Low        High Medium-High     Unknown 
##        5081        3213         961         313         278         208
cond <- SFU_escap$estimate.method == val
SFU_escap$ESTIMATE_CLASSIFICATION[cond] <- "RELATIVE ABUNDANCE (TYPE-4)" # to be conservative
SFU_escap$stream_survey_quality[cond] <- "Medium-Low"

val <- "Expert Opinion"
methods_SFU_l[[val]]$ESTIMATE_CLASSIFICATION
## 
##             PRESENCE-ABSENCE (TYPE-6)           RELATIVE ABUNDANCE (TYPE-5) 
##                                 20595                                  3776 
##           RELATIVE ABUNDANCE (TYPE-4)           RELATIVE ABUNDANCE (TYPE-3) 
##                                  1700                                   448 
##                               UNKNOWN               TRUE ABUNDANCE (TYPE-1) 
##                                   438                                   101 
##               TRUE ABUNDANCE (TYPE-2) RELATIVE: CONSTANT MULTI-YEAR METHODS 
##                                    31                                    24 
##  RELATIVE: VARYING MULTI-YEAR METHODS                   NO SURVEY THIS YEAR 
##                                     2                                     1 
##                                       
##                                     1
methods_SFU_l[[val]]$stream_survey_quality
## 
##         Low  Medium-Low      Medium     Unknown        High Medium-High 
##       24397        1700         448         438         101          31
cond <- SFU_escap$estimate.method == val
SFU_escap$ESTIMATE_CLASSIFICATION[cond] <- "PRESENCE-ABSENCE (TYPE-6)" 
SFU_escap$stream_survey_quality[cond] <- "Low"

val <- "Peak Live + Dead"
methods_SFU_l[[val]]$ESTIMATE_CLASSIFICATION
## 
##           RELATIVE ABUNDANCE (TYPE-5)           RELATIVE ABUNDANCE (TYPE-4) 
##                                  3745                                  2523 
##             PRESENCE-ABSENCE (TYPE-6)           RELATIVE ABUNDANCE (TYPE-3) 
##                                   680                                   531 
##                               UNKNOWN               TRUE ABUNDANCE (TYPE-1) 
##                                   232                                    81 
## RELATIVE: CONSTANT MULTI-YEAR METHODS               TRUE ABUNDANCE (TYPE-2) 
##                                    63                                     7 
##                             NO SURVEY  RELATIVE: VARYING MULTI-YEAR METHODS 
##                                     3                                     1 
##                   NO SURVEY THIS YEAR 
##                                     1
methods_SFU_l[[val]]$stream_survey_quality
## 
##         Low  Medium-Low      Medium     Unknown        High Medium-High 
##        4489        2523         531         232          81           7
cond <- SFU_escap$estimate.method == val
SFU_escap$ESTIMATE_CLASSIFICATION[cond] <- "RELATIVE ABUNDANCE (TYPE-4)" 
SFU_escap$stream_survey_quality[cond] <- "Medium-Low"

val <- "Unknown Estimate Method"
methods_SFU_l[[val]]$ESTIMATE_CLASSIFICATION
## 
##                               UNKNOWN             PRESENCE-ABSENCE (TYPE-6) 
##                                134282                                  1575 
## RELATIVE: CONSTANT MULTI-YEAR METHODS  RELATIVE: VARYING MULTI-YEAR METHODS 
##                                   283                                   251 
##               TRUE ABUNDANCE (TYPE-1)                   NO SURVEY THIS YEAR 
##                                   243                                   140 
##           RELATIVE ABUNDANCE (TYPE-4)           RELATIVE ABUNDANCE (TYPE-5) 
##                                   132                                   109 
##           RELATIVE ABUNDANCE (TYPE-3)               TRUE ABUNDANCE (TYPE-2) 
##                                    59                                    20 
##                             NO SURVEY 
##                                     1
methods_SFU_l[[val]]$stream_survey_quality
## 
##     Unknown         Low        High  Medium-Low      Medium Medium-High 
##      134282        2218         243         132          59          20
cond <- SFU_escap$estimate.method == val
SFU_escap$ESTIMATE_CLASSIFICATION[cond] <- "UNKNOWN" 
SFU_escap$stream_survey_quality[cond] <- "Unknown"

val <- "Other Estimate Method"
methods_SFU_l[[val]]$ESTIMATE_CLASSIFICATION
## 
## RELATIVE ABUNDANCE (TYPE-4) RELATIVE ABUNDANCE (TYPE-3) 
##                        2074                        1553 
##   PRESENCE-ABSENCE (TYPE-6) RELATIVE ABUNDANCE (TYPE-5) 
##                         275                         251 
##     TRUE ABUNDANCE (TYPE-2)     TRUE ABUNDANCE (TYPE-1) 
##                         165                           8 
##                     UNKNOWN 
##                           7
methods_SFU_l[[val]]$stream_survey_quality
## 
##  Medium-Low      Medium         Low Medium-High        High     Unknown 
##        2074        1553         526         165           8           7
cond <- SFU_escap$estimate.method == val
SFU_escap$ESTIMATE_CLASSIFICATION[cond] <- "RELATIVE ABUNDANCE (TYPE-4)" 
SFU_escap$stream_survey_quality[cond] <- "Medium-Low"

val <- "Peak Live + Cumulative Dead"
methods_SFU_l[[val]]$ESTIMATE_CLASSIFICATION
## 
## RELATIVE ABUNDANCE (TYPE-4) RELATIVE ABUNDANCE (TYPE-3) 
##                        2772                        2050 
## RELATIVE ABUNDANCE (TYPE-5)   PRESENCE-ABSENCE (TYPE-6) 
##                         218                          74 
##     TRUE ABUNDANCE (TYPE-2)     TRUE ABUNDANCE (TYPE-1) 
##                           5                           2
methods_SFU_l[[val]]$stream_survey_quality
## 
##  Medium-Low      Medium         Low Medium-High        High 
##        2772        2050         292           5           2
cond <- SFU_escap$estimate.method == val
SFU_escap$ESTIMATE_CLASSIFICATION[cond] <- "RELATIVE ABUNDANCE (TYPE-4)" 
SFU_escap$stream_survey_quality[cond] <- "Medium-Low"

any(is.na(SFU_escap$ESTIMATE_CLASSIFICATION))
## [1] FALSE

2 Match locations

There are 23 locations to match:

##    SFU Stream Name                Nuseds Names Latitude_Longitude (River Mouth)
## 1              Ada              ADA COVE CREEK       52°03'23.1"N 128°03'04.9"W
## 2           Beales        BEALE'S LAGOON CREEK       52°10'59.5"N 127°58'57.3"W
## 3     Bullock Main    BULLOCK CHANNEL #2 CREEK       52°24'09.1"N 128°04'37.4"W
## 4   Bullock Square                        <NA>       52°21'40.0"N 128°03'52.1"W
## 5          Cheenis               CHAMISS CREEK       52°29'16.0"N 128°06'28.9"W
## 6           Clatse                CLATSE CREEK       52°20'22.1"N 127°50'26.6"W
## 7         Codville                        <NA>       52°04'38.6"N 127°51'44.5"W
## 8       Fancy Head       FANCY COVE HEAD CREEK       52°03'35.1"N 128°00'35.5"W
## 9      Fancy Right                        <NA>       52°03'40.6"N 128°00'54.9"W
## 10     Fannie Left FANNIE COVE LEFT HAND CREEK       52°02'34.8"N 128°04'03.3"W
## 11            Fell                        <NA>       52°26'01.9"N 128°04'46.2"W
## 12       Goatbushu            GOAT BUSHU CREEK       52°12'46.0"N 127°52'38.0"W
## 13        Hooknose             HOOK NOSE CREEK       52°07'25.2"N 127°50'23.3"W
## 14      Kill Creek                        <NA>       52°25'30.6"N 128°05'34.5"W
## 15    Kunsoot Main               KUNSOOT RIVER       52°08'57.3"N 128°00'33.5"W
## 16             Lee                   LEE CREEK       52°30'44.5"N 127°49'56.9"W
## 17    Mosquito Bay                        <NA>       52°23'51.3"N 128°10'02.7"W
## 18          Neekas                NEEKAS CREEK       52°28'06.3"N 128°09'38.2"W
## 19         Rainbow               RAINBOW CREEK       52°27'10.1"N 127°43'44.8"W
## 20           Sagar                 SAGAR CREEK       52°05'48.8"N 127°50'22.6"W
## 21    Troupe North                        <NA>       52°17'22.3"N 128°01'42.9"W
## 22    Troupe South                        <NA>       52°16'49.9"N 128°02'32.1"W
## 23         Webster          WEBSTER LAKE CREEK       52°17'02.1"N 127°59'21.9"W

2.1 Some modifications

Convert coordinates from degrees to decimals:

##    SFU Stream Name                Nuseds Names   Latitude   Longitude
## 1              Ada              ADA COVE CREEK 52.0564167 -128.051361
## 2           Beales        BEALE'S LAGOON CREEK 52.1831944 -127.982583
## 3     Bullock Main    BULLOCK CHANNEL #2 CREEK 52.4025278 -128.077056
## 4   Bullock Square                        <NA> 52.3611111 -128.064472
## 5          Cheenis               CHAMISS CREEK 52.4877778 -128.108028
## 6           Clatse                CLATSE CREEK 52.3394722 -127.840722
## 7         Codville                        <NA> 52.0773889 -127.862361
## 8       Fancy Head       FANCY COVE HEAD CREEK 52.0597500 -128.009861
## 9      Fancy Right                        <NA> 52.0612778 -128.015250
## 10     Fannie Left FANNIE COVE LEFT HAND CREEK 52.0430000 -128.067583
## 11            Fell                        <NA> 52.4338611 -128.079500
## 12       Goatbushu            GOAT BUSHU CREEK 52.2127778 -127.877222
## 13        Hooknose             HOOK NOSE CREEK 52.1236667 -127.839806
## 14      Kill Creek                        <NA> 52.4251667 -128.092917
## 15    Kunsoot Main               KUNSOOT RIVER 52.1492500 -128.009306
## 16             Lee                   LEE CREEK 52.5123611 -127.832472
## 17    Mosquito Bay                        <NA> 52.3975833 -128.167417
## 18          Neekas                NEEKAS CREEK 52.4684167 -128.160611
## 19         Rainbow               RAINBOW CREEK 52.4528056 -127.729111
## 20           Sagar                 SAGAR CREEK 52.0968889 -127.839611
## 21    Troupe North                        <NA> 52.2895278 -128.028583
## 22    Troupe South                        <NA> 52.2805278 -128.042250
## 23         Webster          WEBSTER LAKE CREEK 52.2839167 -127.989417

Add the region field using the PSE’s region shape file:

##           region SFU_Stream_Name                Nuseds_Names   Latitude
## 1  Central Coast             Ada              ADA COVE CREEK 52.0564167
## 2  Central Coast          Beales        BEALE'S LAGOON CREEK 52.1831944
## 3  Central Coast    Bullock Main    BULLOCK CHANNEL #2 CREEK 52.4025278
## 4  Central Coast  Bullock Square                        <NA> 52.3611111
## 5  Central Coast         Cheenis               CHAMISS CREEK 52.4877778
## 6  Central Coast          Clatse                CLATSE CREEK 52.3394722
## 7  Central Coast        Codville                        <NA> 52.0773889
## 8  Central Coast      Fancy Head       FANCY COVE HEAD CREEK 52.0597500
## 9  Central Coast     Fancy Right                        <NA> 52.0612778
## 10 Central Coast     Fannie Left FANNIE COVE LEFT HAND CREEK 52.0430000
## 11 Central Coast            Fell                        <NA> 52.4338611
## 12 Central Coast       Goatbushu            GOAT BUSHU CREEK 52.2127778
## 13 Central Coast        Hooknose             HOOK NOSE CREEK 52.1236667
## 14 Central Coast      Kill Creek                        <NA> 52.4251667
## 15 Central Coast    Kunsoot Main               KUNSOOT RIVER 52.1492500
## 16 Central Coast             Lee                   LEE CREEK 52.5123611
## 17 Central Coast    Mosquito Bay                        <NA> 52.3975833
## 18 Central Coast          Neekas                NEEKAS CREEK 52.4684167
## 19 Central Coast         Rainbow               RAINBOW CREEK 52.4528056
## 20 Central Coast           Sagar                 SAGAR CREEK 52.0968889
## 21 Central Coast    Troupe North                        <NA> 52.2895278
## 22 Central Coast    Troupe South                        <NA> 52.2805278
## 23 Central Coast         Webster          WEBSTER LAKE CREEK 52.2839167
##      Longitude
## 1  -128.051361
## 2  -127.982583
## 3  -128.077056
## 4  -128.064472
## 5  -128.108028
## 6  -127.840722
## 7  -127.862361
## 8  -128.009861
## 9  -128.015250
## 10 -128.067583
## 11 -128.079500
## 12 -127.877222
## 13 -127.839806
## 14 -128.092917
## 15 -128.009306
## 16 -127.832472
## 17 -128.167417
## 18 -128.160611
## 19 -127.729111
## 20 -127.839611
## 21 -128.028583
## 22 -128.042250
## 23 -127.989417

2.2 Match locations

We try to match the stream names in SFU_stream with those in the PSE.

There are 15 locations that matched:

##           region SFU_Stream_Name                   WATERBODY GFE_ID
## 1  Central Coast             Ada              ADA COVE CREEK   2683
## 2  Central Coast          Beales        BEALE'S LAGOON CREEK   1803
## 3  Central Coast    Bullock Main    BULLOCK CHANNEL #2 CREEK   2689
## 5  Central Coast         Cheenis               CHAMISS CREEK    998
## 6  Central Coast          Clatse                CLATSE CREEK    989
## 8  Central Coast      Fancy Head       FANCY COVE HEAD CREEK   2685
## 10 Central Coast     Fannie Left FANNIE COVE LEFT HAND CREEK   2687
## 12 Central Coast       Goatbushu            GOAT BUSHU CREEK    987
## 13 Central Coast        Hooknose             HOOK NOSE CREEK   1796
## 15 Central Coast    Kunsoot Main               KUNSOOT RIVER   1801
## 16 Central Coast             Lee                   LEE CREEK    992
## 18 Central Coast          Neekas                NEEKAS CREEK    999
## 19 Central Coast         Rainbow               RAINBOW CREEK    990
## 20 Central Coast           Sagar                 SAGAR CREEK   1794
## 23 Central Coast         Webster          WEBSTER LAKE CREEK   1807

There are 6 locations that did not match:

##           region SFU_Stream_Name WATERBODY GFE_ID
## 7  Central Coast        Codville      <NA>     NA
## 11 Central Coast            Fell      <NA>     NA
## 14 Central Coast      Kill Creek      <NA>     NA
## 17 Central Coast    Mosquito Bay      <NA>     NA
## 21 Central Coast    Troupe North      <NA>     NA
## 22 Central Coast    Troupe South      <NA>     NA

There are 2 locations for which we are not sure and we show here the potential matches (distance is the distance between SFU’s and DFO’s locations in km):

##   SFU_Stream_Name GFE_ID                      WATERBODY distance
## 1  Bullock Square    995         BULLOCK CHANNEL CREEKS    3.379
## 2  Bullock Square   2688     BULLOCK CHANNEL EAST CREEK    2.120
## 3  Bullock Square   2689       BULLOCK CHANNEL #2 CREEK    4.705
## 4  Bullock Square   2690 BULLOCK CHANNEL #3 NORTH CREEK    0.559
## 5  Bullock Square   2691 BULLOCK CHANNEL #3 SOUTH CREEK    4.638
## 6     Fancy Right   2684    FANCY COVE RIGHT HAND CREEK    0.199
## 7     Fancy Right   2685          FANCY COVE HEAD CREEK    0.509

To check if any of these location match, we look at the count data. If the counts are the same for several years then it is likely a match (the SFU points are from SFU):

##   SFU_Stream_Name Nuseds_Names Area   Watershed_Code
## 4  Bullock Square         <NA>    7 915-381700-62200
## 9     Fancy Right         <NA>    7 915-213000-72700
##   Latitude_Longitude_(River_Mouth) UTM_Easting UTM_Northing   Latitude
## 4       52°21'40.0"N 128°03'52.1"W      563598      5801796 52.3611111
## 9       52°03'40.6"N 128°00'54.9"W      567446      5768123 52.0612778
##     Longitude        region GFE_ID in_PSE    X_LONGT      Y_LAT WATERBODY
## 4 -128.064472 Central Coast     NA      ? -128.05055 52.0555371      <NA>
## 9 -128.015250 Central Coast     NA      ? -128.05055 52.0555371      <NA>

There does not seem to be any match, except may be for Bullock Square and BULLOCK CHANNEL #3 SOUTH CREEK but the distances between the two are > 4km (as shown in table above), so we decide that these are different locations and time series.

3 Merge the two datasets

Here is the procedure:

  • if IndexId/POP_ID found (so GFE_ID found too):

    • if new counts –> add them to NuSEDS

    • if counts in conflicts (different value) –> replace values in NuSEDS UNLESS THESE POINTS ARE FROM NUSEDS AS WELL, in which case we keep our data assuming our dataset is more up to date.

      UPDATE: the counts for coho and sockeye sourced from SFU were collected in September and October, which is not during peak season (source: Arianne Nickels, 2026-03-12); these counts are consequently trumped by NuSEDS counts in case the latte are available, otherwise their stream_survey_quality is downgraded

    • if counts are the same –> do nothing

    • in any cases, do not change info about the location

  • if IndexId/POP_ID not found but GFE_ID found:

    • add time series to NuSEDS

    • do not change info about the location

  • if IndexId/POP_ID not found and GFE_ID found:

    • add time series to NuSEDS

    • add info about the location

Note that it is essential to appropriately update the value of the variables associated to counts, populations and locations. The SFU data is shown with purple circles, those ones with one circle were sourced from NuSEDS (by Reynolds’ team), the ones with two circles are Reynolds’s own estimates.

#' variables to delete the value when editing nuseds_cuid_streamid_Reynolds with 
#' existing IndexId and GFE_ID
var_toDelete <- c("NATURAL_ADULT_SPAWNERS","NATURAL_JACK_SPAWNERS","ADULT_PRESENCE",
                  "JACK_PRESENCE","NATURAL_SPAWNERS_TOTAL","ADULT_BROODSTOCK_REMOVALS",
                  "JACK_BROODSTOCK_REMOVALS","TOTAL_BROODSTOCK_REMOVALS","RUN_TYPE",
                         "OTHER_REMOVALS","TOTAL_RETURN_TO_RIVER","Returns",
                        "START_DTT","END_DTT","NATURAL_ADULT_FEMALES",
                        "NATURAL_ADULT_MALES","EFFECTIVE_FEMALES",
                        "WEIGHTED_PCT_SPAWN","WATERBODY_ID","STREAM_ARRIVAL_DT_FROM",
                        "STREAM_ARRIVAL_DT_TO","START_SPAWN_DT_FROM","START_SPAWN_DT_TO",
                        "PEAK_SPAWN_DT_FROM","PEAK_SPAWN_DT_TO","END_SPAWN_DT_FROM",
                        "END_SPAWN_DT_TO","ACCURACY","PRECISION","INDEX_YN",
                        "RELIABILITY","ESTIMATE_STAGE","NO_INSPECTIONS_USED",
                        "CREATED_DTT","UPDATED_DTT","ACT_ID","Source","Spawners",
                        "SpawnersSource","Broodstock","BroodstockSource","Removals",
                        "RemovalsSource","CMNTS","EFFECTIVE_DT")

#' variables to delete the value when adding rows to nuseds_cuid_streamid_Reynolds
#' with non-existing IndexId and but existing GFE_ID
var_toDelete_IndexId <- c("IndexId","POP_ID","POPULATION","streamid")

#' variables to delete the value when adding rows to nuseds_cuid_streamid_Reynolds
#' with non-existing GFE_ID
var_toDelete_GFE_ID <- c("GFE_ID","WATERSHED_CDE","FWA_WATERSHED_CDE","WATERBODY","sys_nm",               
                         "GAZETTED_NAME","LOCAL_NAME_1","LOCAL_NAME_2","CENSUS_SITE",
                         "X_LONGT","Y_LAT","FAZ_ACRO","MAZ_ACRO","JAZ_ACRO",
                         "AREA","StatArea","IS_INDICATOR","GFE_TYPE","coordinates_changed")

## [1] "The population of species sockeye at location Ada is not in NuSEDS (location is in NuSEDS)"

## [1] "The population of species coho at location Bullock Main is not in NuSEDS (location is in NuSEDS)"

## [1] "The population of species sockeye at location Bullock Main is not in NuSEDS (location is in NuSEDS)"

## [1] "Location Bullock Square is not in NuSEDS"

## [1] "Location Bullock Square is not in NuSEDS"

## [1] "Location Bullock Square is not in NuSEDS"

## [1] "Location Codville is not in NuSEDS"

## [1] "Location Codville is not in NuSEDS"

## [1] "Location Codville is not in NuSEDS"

## [1] "The population of species coho at location Fancy Head is not in NuSEDS (location is in NuSEDS)"

## [1] "Location Fancy Right is not in NuSEDS"

## [1] "Location Fancy Right is not in NuSEDS"

## [1] "Location Fancy Right is not in NuSEDS"

## [1] "Location Fancy Right is not in NuSEDS"

## [1] "Location Fell is not in NuSEDS"

## [1] "Location Fell is not in NuSEDS"

## [1] "Location Fell is not in NuSEDS"

## [1] "Location Fell is not in NuSEDS"

## [1] "The population of species sockeye at location Goatbushu is not in NuSEDS (location is in NuSEDS)"

## [1] "Location Kill Creek is not in NuSEDS"

## [1] "Location Kill Creek is not in NuSEDS"

## [1] "Location Kill Creek is not in NuSEDS"

## [1] "Location Mosquito Bay is not in NuSEDS"

## [1] "Location Mosquito Bay is not in NuSEDS"

## [1] "Location Mosquito Bay is not in NuSEDS"

## [1] "Location Mosquito Bay is not in NuSEDS"

## [1] "The population of species sockeye at location Sagar is not in NuSEDS (location is in NuSEDS)"

## [1] "Location Troupe North is not in NuSEDS"

## [1] "Location Troupe North is not in NuSEDS"

## [1] "Location Troupe North is not in NuSEDS"

## [1] "Location Troupe North is not in NuSEDS"

## [1] "Location Troupe North is not in NuSEDS"

## [1] "Location Troupe South is not in NuSEDS"

## [1] "Location Troupe South is not in NuSEDS"

## [1] "Location Troupe South is not in NuSEDS"

## [1] "Location Troupe South is not in NuSEDS"

## [1] "Location Troupe South is not in NuSEDS"

As mentioned above, we downgrade the stream_survey_quality for the counts from the SFU lab for sockeye and coho:

## [1] "BEFORE:"
##         SPECIES      ESTIMATE_METHOD         source_id stream_survey_quality
## 156783     Coho     Peak Live + Dead Reynolds_20260209            Medium-Low
## 298017  Sockeye     Peak Live + Dead Reynolds_20260209            Medium-Low
## 1162914    Coho Area Under the Curve Reynolds_20260209            Medium-Low
## [1] "AFTER:"
##         SPECIES      ESTIMATE_METHOD         source_id stream_survey_quality
## 156783     Coho     Peak Live + Dead Reynolds_20260209                   Low
## 298017  Sockeye     Peak Live + Dead Reynolds_20260209                   Low
## 1162914    Coho Area Under the Curve Reynolds_20260209                   Low

4 Export dataset

if(export_datasets){
  date <- Sys.Date()
  write.csv(nuseds_cuid_streamid_Reynolds,
            paste0(wd_output,"/archive/3_nuseds_cuid_streamid_Reynolds_",date,".csv"),
            row.names = F)
}

END