diff --git a/DESCRIPTION b/DESCRIPTION index 0218412..8148373 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,5 +14,10 @@ Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 Suggests: - testthat (>= 3.0.0) + testthat (>= 3.0.0), + sfnetworks Config/testthat/edition: 3 +Imports: + dplyr, + sf, + sfheaders diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..4ebb3b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,3 @@ # Generated by roxygen2: do not edit by hand +export(stroke) diff --git a/R/stroke.R b/R/stroke.R new file mode 100644 index 0000000..0fd9c44 --- /dev/null +++ b/R/stroke.R @@ -0,0 +1,271 @@ +#' Identify continuous lines in a network +#' +#' Apply the Continuity in Street Network (COINS) method to identify +#' sequences of edges that form naturally continuous strokes in a network. +#' +#' @param edges An object of class \code{\link[sf]{sfc}} (or compatible), +#' including the edge geometries (should be of type LineString or +#' MultiLineString). +#' +#' @param angle_threshold Consecutive line segments can be considered part of +#' the same stroke if the internal angle they form is larger than +#' \code{angle_threshold} (in degrees). It should fall in the range +#' \eqn{0 \leq angle_threshold < 180}. +#' +#' @param attributes If \code{TRUE}, return a label for each edge, representing +#' the groups each edge belongs to. Only possible for \code{flow_mode = TRUE}. +#' +#' @param flow_mode If \code{TRUE}, line segments that belong to the same edge +#' are not split across strokes (even if they form internal angles smaller than +#' \code{angle_threshold}). +#' +#' @param from_edge Only look for the continuous strokes that include the +#' provided edges or line segments. +#' +#' @return An object of class \code{\link[sf]{sfc}} (if +#' \code{attributes = FALSE}), a vector with the same length as \code{edges} +#' otherwise. +#' +#' @export +stroke <- function(edges, angle_threshold = 0, attributes = FALSE, + flow_mode = FALSE, from_edge = NULL) { + + if (attributes && !flow_mode) { + stop("Stroke attributes can be returned only if `flow_mode = TRUE`)") + } + + if (attributes) stop("attribute mode not implemented.") + if (flow_mode) stop("flow mode not implemented.") + if (!is.null(from_edge)) stop("from_edge mode not implemented") + + edges_sfc <- to_sfc(edges) + check_geometry(edges_sfc) + + # extract CRS from the edges + crs <- sf::st_crs(edges_sfc) + + # split the edges into their constituent points + edge_pts <- sfheaders::sfc_to_df(edges_sfc) + + # find unique points ("nodes") and assign them IDs + nodes <- unique_nodes(edge_pts) + + # build array of line segments, referring to points using their IDs + segments <- to_line_segments(edge_pts, nodes) + + # build connectivity table: for each node, find intersecting line segments + links <- get_links(segments) + + # calculate interior angles between segment pairs, identify best links + best_links <- best_link(nodes, segments, links, angle_threshold) + + # verify that the best links identified fulfill input requirements + final_links <- cross_check_links(best_links, flow_mode) + + # merge line segments into strokes following the predetermined connectivity + strokes <- merge_lines(nodes, segments, final_links, from_edge) + + # add the CRS to the edges, done! + sf::st_crs(strokes) <- crs + return(strokes) +} + +#' Find unique nodes and label them with IDs +#' @noRd +unique_nodes <- function(edge_points) { + nodes <- dplyr::distinct(edge_points, x, y) + nodes$node_id <- seq_len(nrow(nodes)) + return(nodes) +} + +#' @noRd +to_line_segments <- function(points, nodes) { + # label all the points using the node IDs + points <- dplyr::left_join(points, nodes, by = c("x", "y")) + + # use the attribute `linestring_id` to identify edge start- and end-points + is_startpoint <- !duplicated(points$linestring_id) + is_endpoint <- !duplicated(points$linestring_id, fromLast = TRUE) + # we build the array of line segments, with shape (nsegements, 2) + # values are the node IDs + start <- points[!is_endpoint, "node_id"] + end <- points[!is_startpoint, "node_id"] + segments <- cbind(start, end) + return(segments) +} + +#' @noRd +get_links <- function(segments) { + nsegments <- nrow(segments) + links <- data.frame(node_id = as.vector(segments)) |> + dplyr::group_by(node_id) |> + dplyr::group_rows() |> + lapply(function(x) (x - 1) %% nsegments + 1) + return(links) +} + +#' @noRd +get_linked_segments <- function(segment_id, node_id, links) { + # find the segments connected to the given one via the given node + # 1. find all segments connected to the node + segs <- links[[node_id]] + # 2. exclude the given segment from the list + is_current_segment <- segs == segment_id + linked_segments <- segs[!is_current_segment] + return(linked_segments) +} + +#' @noRd +get_linked_nodes <- function(node_id, segment_id, segments) { + # find the node connected to the given one via the given segment(s) + # 1. get the nodes that are part of the given segment(s) + nds <- segments[segment_id, ] + # 2. flatten the array row by row (i.e. along the node dimension) + nds <- as.vector(t(nds)) + # 3. exclude the given node from the list + is_current_node <- nds %in% node_id + linked_nodes <- nds[!is_current_node] + return(linked_nodes) +} + +#' @noRd +best_link <- function(nodes, segments, links, angle_threshold = 0) { + + best_links <- array(integer(), dim = dim(segments)) + colnames(best_links) <- c("start", "end") + + angle_threshold_rad <- angle_threshold / 180 * pi # convert to radians + + for (iseg in seq_len(nrow(segments))) { + start_node <- segments[iseg, "start"] + end_node <- segments[iseg, "end"] + + # find angles formed with all segments linked at start point + linked_segs <- get_linked_segments(iseg, start_node, links) + linked_nodes <- get_linked_nodes(start_node, linked_segs, segments) + angles <- interior_angle(nodes[start_node, ], + nodes[end_node, ], + nodes[linked_nodes, ]) + best_link <- get_best_link(angles, linked_segs, angle_threshold_rad) + if (length(best_link) > 0) best_links[iseg, "start"] <- best_link + + # find angles formed with all segments linked at end point + linked_segs <- get_linked_segments(iseg, end_node, links) + linked_nodes <- get_linked_nodes(end_node, linked_segs, segments) + angles <- interior_angle(nodes[end_node, ], + nodes[start_node, ], + nodes[linked_nodes, ]) + best_link <- get_best_link(angles, linked_segs, angle_threshold_rad) + if (length(best_link) > 0) best_links[iseg, "end"] <- best_link + } + return(best_links) +} + +#' @noRd +interior_angle <- function(v, p1, p2) { + # compute convex angle between three points: + # p1--v--p2 ("v" is the vertex) + dx1 <- p1$x - v$x + dx2 <- p2$x - v$x + dy1 <- p1$y - v$y + dy2 <- p2$y - v$y + dot_product <- dx1 * dx2 + dy1 * dy2 + norm1 <- sqrt(dx1^2 + dy1^2) + norm2 <- sqrt(dx2^2 + dy2^2) + cos_theta <- dot_product / (norm1 * norm2) + angle <- acos(cos_theta) + return(angle) +} + +#' @noRd +get_best_link <- function(angles, links, angle_threshold = 0) { + if (length(angles) == 0) return(NA) + is_above_threshold <- angles > angle_threshold + is_best_link <- which.max(angles[is_above_threshold]) + best_link <- links[is_best_link] + return(best_link) +} + +#' @noRd +cross_check_links <- function(best_links, flow_mode = FALSE) { + + links <- array(integer(), dim = dim(best_links)) + colnames(links) <- c("start", "end") + + # find the best link of the best links + bl <- best_links[best_links[, "start"], ] + # we check both ends to see whether the best link is reciprocal + is_best_link <- bl == seq_len(nrow(bl)) + # if we have a match on either of the sides, we keep the link + is_reciprocal <- apply(is_best_link, 1, any) + is_reciprocal[is.na(is_reciprocal)] <- FALSE # fix for NA values + links[is_reciprocal, "start"] <- best_links[is_reciprocal, "start"] + + # exact same as above, for the other side + bl <- best_links[best_links[, "end"], ] + is_best_link <- bl == seq_len(nrow(bl)) + is_reciprocal <- apply(is_best_link, 1, any) + is_reciprocal[is.na(is_reciprocal)] <- FALSE # fix for NA values + links[is_reciprocal, "end"] <- best_links[is_reciprocal, "end"] + + return(links) +} + +#' @noRd +get_nodes <- function(node_id, segment_id, segments) { + # find the node connected to the given one via the given segment(s) + # 1. get the nodes that are part of the given segment(s) + nds <- segments[segment_id, ] + # 2. exclude the given node from the list + is_current_node <- nds == node_id + linked_nodes <- nds[!is_current_node] + return(linked_nodes) +} + +#' @noRd +to_linestring <- function(node_id, nodes) { + points <- nodes[node_id, ] + linestring <- sfheaders::sfc_linestring(points, x = "x", y = "y") + return(linestring) +} + +#' @noRd +merge_lines <- function(nodes, segments, links, from_edge = NULL) { + + is_segment_used <- array(FALSE, dim = nrow(segments)) + strokes <- sf::st_sfc() + for (iseg in seq_len(nrow(segments))) { + if (is_segment_used[iseg]) next + stroke <- c() + + point <- segments[iseg, "start"] + link <- links[iseg, "start"] + current <- iseg + is_closed_loop <- FALSE + while (TRUE) { + stroke <- c(point, stroke) + is_segment_used[current] <- TRUE + if (is.na(link) || is_closed_loop) break + point <- get_nodes(point, link, segments) + is_closed_loop <- point %in% stroke + current <- link + link <- links[current, names(point)] + } + + point <- segments[iseg, "end"] + link <- links[iseg, "end"] + current <- iseg + is_closed_loop <- FALSE + while (TRUE) { + stroke <- c(stroke, point) + is_segment_used[current] <- TRUE + if (is.na(link) || is_closed_loop) break + point <- get_nodes(point, link, segments) + is_closed_loop <- point %in% stroke + current <- link + link <- links[current, names(point)] + } + strokes <- c(strokes, to_linestring(stroke, nodes)) + } + return(strokes) +} diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..fa07e72 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,26 @@ +#' @noRd +to_sfc <- function(x) { + sfc <- NULL + if (inherits(x, "sfc")) { + sfc <- x + } else if (inherits(x, "sf")) { + sfc <- sf::st_geometry(x) + } else if (inherits(x, "sfnetwork")) { + sf <- sf::st_as_sf(x, "edges") + sfc <- to_sfc(sf) + } else { + sfc <- sf::st_as_sfc(x) + } + return(sfc) +} + +#' @noRd +check_geometry <- function(geometry) { + # verify that we only have linestrings + geometry_type <- sf::st_geometry_type(geometry) + is_not_linestring <- geometry_type != "LINESTRING" + if (any(is_not_linestring)) { + template <- "Edges should be of type LINESTRING (a %s is provided)." + stop(sprintf(template, geometry_type[is_not_linestring])) + } +} diff --git a/man/stroke.Rd b/man/stroke.Rd new file mode 100644 index 0000000..c771b7f --- /dev/null +++ b/man/stroke.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stroke.R +\name{stroke} +\alias{stroke} +\title{Identify continuous lines in a network} +\usage{ +stroke( + edges, + angle_threshold = 0, + attributes = FALSE, + flow_mode = FALSE, + from_edge = NULL +) +} +\arguments{ +\item{edges}{An object of class \code{\link[sfc]{sfc}} (or compatible), +including the edge geometries (should be of type LineString or +MultiLineString).} + +\item{angle_threshold}{Consecutive line segments can be considered part of +the same stroke if the internal angle they form is larger than +\code{angle_threshold} (in degrees). It should fall in the range +\eqn{0 \leq angle_threshold < 180}.} + +\item{attributes}{If \code{TRUE}, return a label for each edge, representing +the groups each edge belongs to. Only possible for \code{flow_mode = TRUE}.} + +\item{flow_mode}{If \code{TRUE}, line segments that belong to the same edge +are not split across strokes (even if they form internal angles smaller than +\code{angle_threshold}).} + +\item{from_edge}{Only look for the continuous strokes that include the +provided edges or line segments.} +} +\value{ +An object of class \code{\link[sfc]{sfc}} (if +\code{attributes = FALSE}), a vector with the same length as \code{edges} +otherwise. +} +\description{ +Apply the Continuity in Street Network (COINS) method to identify +sequences of edges that form naturally continuous strokes in a network. +} diff --git a/tests/testthat/test-my-test.R b/tests/testthat/test-my-test.R deleted file mode 100644 index 8849056..0000000 --- a/tests/testthat/test-my-test.R +++ /dev/null @@ -1,3 +0,0 @@ -test_that("multiplication works", { - expect_equal(2 * 2, 4) -}) diff --git a/tests/testthat/test-stroke.R b/tests/testthat/test-stroke.R new file mode 100644 index 0000000..0732a61 --- /dev/null +++ b/tests/testthat/test-stroke.R @@ -0,0 +1,134 @@ +# p4 +# / +# p1 - p2 - p3 +# \ | +# p5 - p6 +p1 <- sf::st_point(c(0, 0)) +p2 <- sf::st_point(c(1, 0)) +p3 <- sf::st_point(c(2, 0)) +p4 <- sf::st_point(c(2, 1)) +p5 <- sf::st_point(c(2, -1)) +p6 <- sf::st_point(c(3, -1)) + +l1 <- sf::st_linestring(c(p1, p2)) +l2 <- sf::st_linestring(c(p2, p3)) +l3 <- sf::st_linestring(c(p2, p4)) +l4 <- sf::st_linestring(c(p2, p5)) +l5 <- sf::st_linestring(c(p5, p3)) +l6 <- sf::st_linestring(c(p5, p6)) + +test_that("a stroke is found in a very simple network", { + sfc <- sf::st_sfc(l1, l2, l3) + expected <- sf::st_sfc(sf::st_linestring(c(p1, p2, p3)), l3) + actual <- stroke(sfc) + expect_setequal(actual, expected) +}) + +test_that("sf objects can be used in input", { + sfc <- sf::st_sfc(l1, l2, l3) + expected <- sf::st_sfc(sf::st_linestring(c(p1, p2, p3)), l3) + actual <- sf::st_as_sf(sfc) |> stroke() + expect_setequal(actual, expected) +}) + +test_that("sfnetworks objects can be used in input", { + + skip_if_not_installed("sfnetworks") + + nodes <- sf::st_sfc(p1, p2, p3, p4) + edges <- sf::st_sf(from = c(1, 2, 2), + to = c(2, 3, 4), + geometry = sf::st_sfc(l1, l2, l3)) + net <- sfnetworks::sfnetwork(nodes = nodes, edges = edges, + directed = FALSE, force = TRUE) + expected <- sf::st_sfc(sf::st_linestring(c(p1, p2, p3)), l3) + actual <- stroke(net) + expect_setequal(actual, expected) +}) + +test_that("multilinestrings are not supported", { + sfc <- sf::st_sfc(c(l1, l2), l3) + expect_error(stroke(sfc), "MULTILINESTRING") +}) + +test_that("proper attributes are returned for a very simple network", { + skip(message = "attribute mode to be implemented") + sfc <- sf::st_sfc(l1, l2, l3) + expected <- as.integer(c(1, 1, 2)) + actual <- stroke(sfc, attributes = TRUE, flow_mode = TRUE) + expect_setequal(actual, expected) +}) + +test_that("two linesegments are always merged if threshold is zero", { + sfc <- sf::st_sfc(l2, l4) + expected <- sf::st_sfc(sf::st_linestring(c(p5, p2, p3))) + actual <- stroke(sfc, angle_threshold = 0) + expect_setequal(actual, expected) +}) + +test_that("a more complex network with no threshold form a stroke", { + sfc <- sf::st_sfc(l1, l4, l5, l6) + expected <- sf::st_sfc(sf::st_linestring(c(p1, p2, p5, p6)), l5) + actual <- stroke(sfc, angle_threshold = 0) + expect_setequal(actual, expected) +}) + +test_that("a more complex network with threshold does not form strokes", { + sfc <- sf::st_sfc(l1, l4, l5, l6) + expected <- sfc + actual <- stroke(sfc, angle_threshold = 150.) + expect_setequal(actual, expected) +}) + +test_that("attributes cannot be returned if not in flow mode", { + skip(message = "attribute mode to be implemented") + sfc <- sf::st_sfc(l1, l2) + expect_error(stroke(sfc, attributes = TRUE, flow_mode = FALSE), + "Stroke attributes can be returned only if `flow_mode = TRUE`)") +}) + +test_that("edges can be split if flow_mode is false", { + l1 <- sf::st_linestring(c(p1, p2, p5)) + sfc <- sf::st_sfc(l1, l2) + expected <- sf::st_sfc(sf::st_linestring(c(p1, p2, p3)), + sf::st_linestring(c(p2, p5))) + actual <- stroke(sfc, flow_mode = FALSE) + expect_setequal(actual, expected) +}) + +test_that("edges are not split if flow_mode is true", { + skip(message = "flow mode to be implemented") + l1 <- sf::st_linestring(c(p1, p2, p5)) + sfc <- sf::st_sfc(l1, l2) + expected <- sfc + actual <- stroke(sfc, flow_mode = TRUE) + expect_setequal(actual, expected) +}) + +test_that("strokes can be formed starting from a given edge", { + skip(message = "stroke from edge to be implemented") + l1 <- sf::st_linestring(c(p1, p2, p3)) + sfc <- sf::st_sfc(l1, l4, l6) + expected <- sf::st_sfc(sf::st_linestring(c(p1, p2, p5, p6))) + actual <- stroke(sfc, flow_mode = FALSE, from_edge = 3) + expect_setequal(actual, expected) +}) + +test_that("strokes can be formed starting from a given line segment", { + skip(message = "stroke from edge to be implemented") + l1 <- sf::st_linestring(c(p1, p2, p3)) + l2 <- sf::st_linestring(c(p2, p5, p6)) + sfc <- sf::st_sfc(l1, l2) + expected <- sf::st_sfc(sf::st_linestring(c(p1, p2, p5, p6))) + actual <- stroke(sfc, flow_mode = FALSE, + from_edge = sf::st_linestring(c(p5, p6))) + expect_setequal(actual, expected) +}) + +test_that("attributes can be returned if edge is specified in flow mode", { + skip(message = "flow mode to be implemented") + sfc <- sf::st_sfc(l1, l2, l5, l6) + expected <- as.integer(c(1, NA, 1, 1)) + actual <- stroke(sfc, attribute = TRUE, flow_mode = TRUE, from_edge = 3) + expect_setequal(actual, expected) +})