From 5ace0aef9982942a54ad2b676b687c3eed1ee31d Mon Sep 17 00:00:00 2001 From: Tilmann Date: Thu, 13 Jun 2024 18:25:14 +0200 Subject: [PATCH] Fix for issue #40 --- CHANGELOG.md | 7 ++ src/main/java/org/tinspin/index/Index.java | 1 + .../org/tinspin/index/qthypercube2/QNode.java | 76 +++++++++---------- .../index/qthypercube2/QuadTreeKD2.java | 3 +- .../org/tinspin/index/qt2/Quadtree2Test.java | 56 +++++++++++++- .../org/tinspin/index/test/PointMapTest.java | 1 + .../tinspin/index/test/PointMultimapTest.java | 3 + 7 files changed, 103 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e01064..05f52a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [2.1.4 - Unreleased] - Fixed tree corruption after remove() in QT2. [#40](https://github.com/tzaeschke/tinspin-indexes/issue/40) + - Fixed tree consistency (single-entry leaf after remove) + - Fixed tree consistency (nValues) -> verify + - Fixed bug in qt2.contains() +TODO +- CLean up calls to checkMerge() -> call leaf-merge only in leaf! +- What is going on with root expansion? Why does it not fail in tests? +- check qt0 ## [2.1.3] - 2023-11-19 diff --git a/src/main/java/org/tinspin/index/Index.java b/src/main/java/org/tinspin/index/Index.java index 01cba75..5bed4d7 100644 --- a/src/main/java/org/tinspin/index/Index.java +++ b/src/main/java/org/tinspin/index/Index.java @@ -41,6 +41,7 @@ public interface Index { /** * @return Collect and return some index statistics. Note that indexes are not required * to fill all fields. Also, individual indexes may use subclasses with additional fields. + * Many indexes also perform consistency checks while gathering stats. */ Stats getStats(); diff --git a/src/main/java/org/tinspin/index/qthypercube2/QNode.java b/src/main/java/org/tinspin/index/qthypercube2/QNode.java index 71bf065..833f002 100644 --- a/src/main/java/org/tinspin/index/qthypercube2/QNode.java +++ b/src/main/java/org/tinspin/index/qthypercube2/QNode.java @@ -211,7 +211,9 @@ PointEntry remove(QNode parent, double[] key, int maxNodeSize, Predicate

removed = ((QNode)o).remove(this, key, maxNodeSize, pred); - checkAndMergeLeafNodesInParent(parent, maxNodeSize); + if (removed != null) { + checkAndMergeLeafNodesInParent(parent, maxNodeSize); + } return removed; } else if (o instanceof PointEntry) { PointEntry e = (PointEntry) o; @@ -284,7 +286,7 @@ PointEntry update(QNode parent, double[] keyOld, double[] keyNew, int maxN } return qe; } - throw new IllegalStateException(); + return null; } for (int i = 0; i < nValues; i++) { @@ -313,9 +315,25 @@ private void updateSub(double[] keyNew, PointEntry e, QNode parent, int ma } } + private boolean checkMergeSingleLeaf(QNode parent) { + // Merge single value into parent if possible + if (!isLeaf() || parent == null || nValues > 1) { + return false; + } + for (int i = 0; i < parent.subs.length; i++) { + if (parent.subs[i] == this) { + parent.subs[i] = values[0]; + parent.nValues++; + nValues = 0; + return true; + } + } + throw new IllegalStateException(); + } + @SuppressWarnings("unchecked") private void checkAndMergeLeafNodesInParent(QNode parent, int maxNodeSize) { - if (parent != null) { + if (!checkMergeSingleLeaf(parent) && parent != null) { parent.checkAndMergeLeafNodes(maxNodeSize); } } @@ -323,8 +341,7 @@ private void checkAndMergeLeafNodesInParent(QNode parent, int maxNodeSize) { @SuppressWarnings("unchecked") private void checkAndMergeLeafNodes(int maxNodeSize) { //check: We start with including all local values: nValues - int nTotal = nValues; - int nSubs = 0; + int nTotal = 0; for (int i = 0; i < subs.length; i++) { Object e = subs[i]; if (e instanceof QNode) { @@ -335,31 +352,16 @@ private void checkAndMergeLeafNodes(int maxNodeSize) { return; } nTotal += sub.getValueCount(); - if (nTotal > maxNodeSize) { - //too many children - return; - } - nSubs++; + } else if (e instanceof PointEntry) { + nTotal++; } } - - //okay, let's merge. - // Special case: only one subnode (all subs are leaf nodes) - if (nSubs == 1) { - for (int i = 0; i < subs.length; i++) { - Object e = subs[i]; - if (e instanceof QNode) { - QNode sub = (QNode) e; - values = sub.values; - nValues = sub.nValues; - subs = null; - isLeaf = true; - return; - } - } - throw new IllegalStateException(); + if (nTotal > maxNodeSize) { + //too many children + return; } + //okay, let's merge. values = new PointEntry[nTotal]; nValues = 0; for (int i = 0; i < subs.length; i++) { @@ -394,7 +396,7 @@ PointEntry getExact(double[] key, Predicate> pred) { return ((QNode)sub).getExact(key, pred); } else if (sub != null) { PointEntry e = (PointEntry) sub; - if (QUtil.isPointEqual(e.point(), key)) { + if (QUtil.isPointEqual(e.point(), key) && pred.test(e)) { return e; } } @@ -433,10 +435,6 @@ void checkNode(QStats s, QNode parent, int depth) { System.out.println("Outer: " + parent.radius + " " + Arrays.toString(parent.center)); System.out.println("Child: " + radius + " " + Arrays.toString(center)); for (int d = 0; d < center.length; d++) { -// if ((centerOuter[d]+radiusOuter) / (centerEnclosed[d]+radiusEnclosed) < 0.9999999 || -// (centerOuter[d]-radiusOuter) / (centerEnclosed[d]-radiusEnclosed) > 1.0000001) { -// return false; -// } double parentMax = parent.center[d] + parent.radius; double childMax = center[d] + radius; double parentMin = parent.center[d] - parent.radius; @@ -451,12 +449,8 @@ void checkNode(QStats s, QNode parent, int depth) { System.out.println("min: " + parentMin + " vs " + childMin); System.out.println(" r: " + parentMin / childMin); } - // System.out.println("max " + (parent.center[d]+parent.radius) + " vs " + (center[d]+radius)); - // System.out.println(" r=" + (parent.center[d]+parent.radius) / (center[d]+radius)); - // System.out.println("min " + (parent.center[d]-parent.radius) + " vs " + (center[d]-radius)); - // System.out.println(" r=" + (parent.center[d]-parent.radius) / (center[d]-radius)); } - // throw new IllegalStateException(); // TODO reenable + throw new IllegalStateException(); } } if (values != null) { @@ -480,6 +474,7 @@ void checkNode(QStats s, QNode parent, int depth) { throw new IllegalStateException(); } int nSubs = 0; + int nFoundValues = 0; for (int i = 0; i < subs.length; i++) { Object n = subs[i]; //TODO check pos @@ -488,9 +483,14 @@ void checkNode(QStats s, QNode parent, int depth) { ((QNode)n).checkNode(s, this, depth+1); } else if (n != null) { s.nEntries++; + nFoundValues++; checkEntry(n); } } + if (nValues != nFoundValues) { + throw new IllegalStateException(); + } + s.histoValues[nFoundValues]++; s.histo(nSubs); } } @@ -502,10 +502,6 @@ private void checkEntry(Object o) { System.out.println("Node: " + radius + " " + Arrays.toString(center)); System.out.println("Child: " + Arrays.toString(e.point())); for (int d = 0; d < center.length; d++) { -// if ((centerOuter[d]+radiusOuter) / (centerEnclosed[d]+radiusEnclosed) < 0.9999999 || -// (centerOuter[d]-radiusOuter) / (centerEnclosed[d]-radiusEnclosed) > 1.0000001) { -// return false; -// } System.out.println("min/max for " + d); System.out.println("min: " + (center[d]-radius) + " vs " + (e.point()[d])); System.out.println("r=" + (center[d]-radius) / (e.point()[d])); diff --git a/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java b/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java index d721dc3..362fc06 100644 --- a/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java +++ b/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java @@ -396,6 +396,7 @@ private void toStringTree(StringBuilderLn sb, QNode node, int depth, int posInParent) { String prefix = ".".repeat(depth); sb.append(prefix + posInParent + " d=" + depth); + sb.append(" nV=" + node.getValueCount()); sb.append(" " + Arrays.toString(node.getCenter())); sb.appendLn("/" + node.getRadius()); prefix += " "; @@ -436,7 +437,7 @@ public QStats getStats() { * Statistics container class. */ public static class QStats extends Stats { - final int[] histoValues = new int[100]; + final int[] histoValues = new int[1000]; final int[] histoSubs; static final int HISTO_MAX = (1 << 10) + 1; public QStats(int dims) { diff --git a/src/test/java/org/tinspin/index/qt2/Quadtree2Test.java b/src/test/java/org/tinspin/index/qt2/Quadtree2Test.java index 20bf5e0..6c4e2a7 100644 --- a/src/test/java/org/tinspin/index/qt2/Quadtree2Test.java +++ b/src/test/java/org/tinspin/index/qt2/Quadtree2Test.java @@ -1,15 +1,35 @@ +/* + * Copyright 2009-2017 Tilmann Zaeschke. All rights reserved. + * + * This file is part of TinSpin. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.tinspin.index.qt2; -import java.util.Arrays; - import org.junit.Test; import org.tinspin.index.qthypercube2.QuadTreeKD2; +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + public class Quadtree2Test { @Test - public void testIssue0040() { + public void testIssue0040_remove() { double[][] data = new double[][] { new double[]{-49.0949020385742, -2.05027413368225, 819588127, 0}, new double[]{-49.0949020385742, -2.05027389526367, 819588127, 0}, @@ -38,4 +58,34 @@ public void testIssue0040() { } } } + + @Test + public void testIssue0040_rootExpansion() { + double[][] data = new double[][] { + new double[]{-49.0949020385742, -2.05027413368225, 819588127, 0}, + new double[]{-49.0949020385742, -2.05027389526367, 819588127, 0}, + new double[]{-45.6938514709473, 32.9847145080566, -2056090140, 0}, + new double[]{-45.6938514709473, 32.9847145080566, -2056090140, 0}, + new double[]{-1.7595032453537, 112.097793579102, -267989921, 0}, + new double[]{-1.75950336456299, 112.097793579102, -267989921, 0}, + new double[]{45.6938438415527, 32.9847145080566, 1591613824, 0}, + new double[]{45.6938438415527, 32.9847145080566, 1591613824, 0}, + new double[]{49.0948944091797, -2.05027413368225, 14481734, 0}, + new double[]{49.0948944091797, -2.05027389526367, 14481734, 0}, + }; + + QuadTreeKD2 tree = QuadTreeKD2.create(2); + for (int i = 0; i < data.length; i++) { + tree.getStats(); + tree.insert(Arrays.copyOf(data[i], 2), (int)data[i][2]); + } + System.out.println(tree.toStringTree()); + + // root test + for (int i = 0; i < data.length; i++) { + assertTrue(tree.contains(data[i])); + assertTrue(tree.contains(data[i], (int)data[i][2])); + assertEquals( (int)data[i][2], (int) tree.queryExact(data[i])); + } + } } diff --git a/src/test/java/org/tinspin/index/test/PointMapTest.java b/src/test/java/org/tinspin/index/test/PointMapTest.java index 5a0da7c..224a0de 100644 --- a/src/test/java/org/tinspin/index/test/PointMapTest.java +++ b/src/test/java/org/tinspin/index/test/PointMapTest.java @@ -127,6 +127,7 @@ private void smokeTest(List data) { tree.insert(e.p, e); } // System.out.println(tree.toStringTree()); + tree.getStats(); for (Entry e : data) { assertTrue("contains(point) failed: " + e, tree.contains(e.p)); Entry e2 = tree.queryExact(e.p); diff --git a/src/test/java/org/tinspin/index/test/PointMultimapTest.java b/src/test/java/org/tinspin/index/test/PointMultimapTest.java index f8f21a0..025d043 100644 --- a/src/test/java/org/tinspin/index/test/PointMultimapTest.java +++ b/src/test/java/org/tinspin/index/test/PointMultimapTest.java @@ -211,6 +211,7 @@ public void testUpdate() { assertFalse(containsExact(tree, pOld, e.id)); assertTrue(containsExact(tree, e.p, e.id)); } + tree.getStats(); for (int i = 0; i < data.size(); ++i) { Entry e = data.get(i); @@ -327,6 +328,7 @@ public void testIssueKnnRemove() { for (Entry e : data) { tree.insert(e.p, e); } + tree.getStats(); // remove 1st half for (int i = 0; i < nDelete; ++i) { @@ -336,6 +338,7 @@ public void testIssueKnnRemove() { assertFalse(containsExact(tree, e.p, e.id)); assertFalse(tree.contains(e.p, e)); } + tree.getStats(); // check contains() & kNN for (int i = 0; i < nDelete; ++i) {