Skip to content

Commit

Permalink
substitute TODO in version matching issue with implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
kenoir committed Sep 16, 2024
1 parent 395b654 commit b42e473
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ class WorkMatcher(
matcherResult <-
if (updatedNodes.isEmpty) {
val result = MatcherResult(

// TODO: There could be an issue here - the merger relies on the versions reported here to be correct
// TODO: AAAAND the versions passed in and stored in the identified index may be higher than those in the graph store
works = toMatchedIdentifiers(afterNodes),
createdTime = Instant.now()
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package weco.pipeline.merger.services

import grizzled.slf4j.Logging
import weco.catalogue.internal_model.work.WorkState.Identified
import weco.catalogue.internal_model.work.Work
import weco.pipeline.matcher.models.WorkIdentifier
Expand All @@ -9,7 +10,7 @@ import scala.concurrent.{ExecutionContext, Future}

class IdentifiedWorkLookup(retriever: Retriever[Work[Identified]])(
implicit ec: ExecutionContext
) {
) extends Logging {
def fetchAllWorks(
workIdentifiers: Seq[WorkIdentifier]
): Future[Seq[Option[Work[Identified]]]] = {
Expand All @@ -28,14 +29,21 @@ class IdentifiedWorkLookup(retriever: Retriever[Work[Identified]])(
.map {
case WorkIdentifier(id, version) =>
val work = works(id.toString)
// We only want to get the exact versions of the works specified
// by the matcher.
//
// e.g. if the matcher said "combine Av1 and Bv2", and we look
// in the retriever and find {Av2, Bv3}, we shouldn't merge
// these -- we should wait for the matcher to confirm we should
// still be merging these two works.
if (work.version == version) Some(work) else None
// Being asked to merge non-matching versions is incorrect
// but it is possible for the version in the identified index
// to be higher than the version in the graph store.
// Choosing between:
// (a) a complete failure where no works are merged
// (b) a partial failure where only the works with matching versions are merged
// (c) a partial success where all works are merged, accepting the risk of inconsistency
// We choose (c) as the least disruptive option.
if (work.version != version) {
warn(
"Matching version inconsistent! " +
s"Found work $work with version $version, expected ${work.version}"
)
}
Some(work)
}
case RetrieverMultiResult(_, notFound) =>
throw new RuntimeException(s"Works not found: $notFound")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class IdentifiedWorkLookupTest
}
}

it("returns None if the stored version has a higher version") {
it("returns Some even if the stored version has a higher version") {
val oldWork = identifiedWork()
val newWork = oldWork.withVersion(oldWork.version + 1)

Expand All @@ -50,7 +50,7 @@ class IdentifiedWorkLookupTest
)

whenReady(fetchAllWorks(retriever = retriever, oldWork)) {
_ shouldBe Seq(None)
_ shouldBe Seq(Some(newWork))
}
}

Expand All @@ -72,14 +72,8 @@ class IdentifiedWorkLookupTest
}: _*)
)

val expectedLookupResult =
unchangedWorks.map { Some(_) } ++ (4 to 5).map {
_ =>
None
}

whenReady(fetchAllWorks(retriever = retriever, lookupWorks: _*)) {
_ shouldBe expectedLookupResult
_ shouldBe storedWorks.map(Some(_))
}
}

Expand Down

0 comments on commit b42e473

Please sign in to comment.