diff --git a/CHANGELOG.md b/CHANGELOG.md index 909731e..cc64f50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Nothing yet +## [2.1.4 - Unreleased] + +- Fixed tree corruption after remove() in QT2. [#40](https://github.com/tzaeschke/tinspin-indexes/issue/40) + - Fixed tree consistency (single-entry leaf after remove) + - Fixed tree consistency (nValues) -> verify + - Fixed bug in qt2.contains() + ## [2.1.3] - 2023-11-19 ### Fixed -- Fixed QUadtreeKD2 kNN finding previously deleted entries [#37](https://github.com/tzaeschke/tinspin-indexes/issue/37) +- Fixed QuadtreeKD2 kNN finding previously deleted entries [#37](https://github.com/tzaeschke/tinspin-indexes/issue/37) ## [2.1.2] - 2023-10-31 diff --git a/src/main/java/org/tinspin/index/Index.java b/src/main/java/org/tinspin/index/Index.java index 01cba75..5bed4d7 100644 --- a/src/main/java/org/tinspin/index/Index.java +++ b/src/main/java/org/tinspin/index/Index.java @@ -41,6 +41,7 @@ public interface Index { /** * @return Collect and return some index statistics. Note that indexes are not required * to fill all fields. Also, individual indexes may use subclasses with additional fields. + * Many indexes also perform consistency checks while gathering stats. */ Stats getStats(); diff --git a/src/main/java/org/tinspin/index/qthypercube2/QNode.java b/src/main/java/org/tinspin/index/qthypercube2/QNode.java index 7564b1e..833f002 100644 --- a/src/main/java/org/tinspin/index/qthypercube2/QNode.java +++ b/src/main/java/org/tinspin/index/qthypercube2/QNode.java @@ -26,7 +26,14 @@ /** * Node class for the quadtree. - * + *

+ * A node can be in one of two modes: "directory node" and "leaf node". + * Directory nodes have an 2^dim array of "subs", where ich entry can be one of: a point, a subnode, or null. + * Leaf nodes have an array of "values" which are all points. + *

+ * A new subnode is inserted in "subs" if a slot in "subs" already contains a point. + * + * * @author ztilmann * * @param Value type. @@ -203,7 +210,11 @@ PointEntry remove(QNode parent, double[] key, int maxNodeSize, Predicate

)o).remove(this, key, maxNodeSize, pred); + PointEntry removed = ((QNode)o).remove(this, key, maxNodeSize, pred); + if (removed != null) { + checkAndMergeLeafNodesInParent(parent, maxNodeSize); + } + return removed; } else if (o instanceof PointEntry) { PointEntry e = (PointEntry) o; if (removeSub(parent, key, pos, e, maxNodeSize, pred)) { @@ -228,9 +239,7 @@ private boolean removeSub( removeValue(pos); //TODO provide threshold for re-insert // i.e. do not always merge. - if (parent != null) { - parent.checkAndMergeLeafNodes(maxNodeSize); - } + checkAndMergeLeafNodesInParent(parent, maxNodeSize); return true; } return false; @@ -273,13 +282,11 @@ PointEntry update(QNode parent, double[] keyOld, double[] keyNew, int maxN requiresReinsert[0] = false; } else { requiresReinsert[0] = true; - if (parent != null) { - parent.checkAndMergeLeafNodes(maxNodeSize); - } + checkAndMergeLeafNodesInParent(parent, maxNodeSize); } return qe; } - throw new IllegalStateException(); + return null; } for (int i = 0; i < nValues; i++) { @@ -304,16 +311,37 @@ private void updateSub(double[] keyNew, PointEntry e, QNode parent, int ma requiresReinsert[0] = true; //TODO provide threshold for re-insert //i.e. do not always merge. - if (parent != null) { - parent.checkAndMergeLeafNodes(maxNodeSize); + checkAndMergeLeafNodesInParent(parent, maxNodeSize); + } + } + + private boolean checkMergeSingleLeaf(QNode parent) { + // Merge single value into parent if possible + if (!isLeaf() || parent == null || nValues > 1) { + return false; + } + for (int i = 0; i < parent.subs.length; i++) { + if (parent.subs[i] == this) { + parent.subs[i] = values[0]; + parent.nValues++; + nValues = 0; + return true; } } + throw new IllegalStateException(); + } + + @SuppressWarnings("unchecked") + private void checkAndMergeLeafNodesInParent(QNode parent, int maxNodeSize) { + if (!checkMergeSingleLeaf(parent) && parent != null) { + parent.checkAndMergeLeafNodes(maxNodeSize); + } } @SuppressWarnings("unchecked") private void checkAndMergeLeafNodes(int maxNodeSize) { //check: We start with including all local values: nValues - int nTotal = nValues; + int nTotal = 0; for (int i = 0; i < subs.length; i++) { Object e = subs[i]; if (e instanceof QNode) { @@ -324,14 +352,16 @@ private void checkAndMergeLeafNodes(int maxNodeSize) { return; } nTotal += sub.getValueCount(); - if (nTotal > maxNodeSize) { - //too many children - return; - } + } else if (e instanceof PointEntry) { + nTotal++; } } - - //okay, let's merge + if (nTotal > maxNodeSize) { + //too many children + return; + } + + //okay, let's merge. values = new PointEntry[nTotal]; nValues = 0; for (int i = 0; i < subs.length; i++) { @@ -366,7 +396,7 @@ PointEntry getExact(double[] key, Predicate> pred) { return ((QNode)sub).getExact(key, pred); } else if (sub != null) { PointEntry e = (PointEntry) sub; - if (QUtil.isPointEqual(e.point(), key)) { + if (QUtil.isPointEqual(e.point(), key) && pred.test(e)) { return e; } } @@ -402,18 +432,23 @@ void checkNode(QStats s, QNode parent, int depth) { if (parent != null) { if (!QUtil.isNodeEnclosed(center, radius, parent.center, parent.radius*QUtil.EPS_MUL)) { + System.out.println("Outer: " + parent.radius + " " + Arrays.toString(parent.center)); + System.out.println("Child: " + radius + " " + Arrays.toString(center)); for (int d = 0; d < center.length; d++) { -// if ((centerOuter[d]+radiusOuter) / (centerEnclosed[d]+radiusEnclosed) < 0.9999999 || -// (centerOuter[d]-radiusOuter) / (centerEnclosed[d]-radiusEnclosed) > 1.0000001) { -// return false; -// } - System.out.println("Outer: " + parent.radius + " " + - Arrays.toString(parent.center)); - System.out.println("Child: " + radius + " " + Arrays.toString(center)); - System.out.println((parent.center[d]+parent.radius) + " vs " + (center[d]+radius)); - System.out.println("r=" + (parent.center[d]+parent.radius) / (center[d]+radius)); - System.out.println((parent.center[d]-parent.radius) + " vs " + (center[d]-radius)); - System.out.println("r=" + (parent.center[d]-parent.radius) / (center[d]-radius)); + double parentMax = parent.center[d] + parent.radius; + double childMax = center[d] + radius; + double parentMin = parent.center[d] - parent.radius; + double childMin = center[d] - radius; + if (parentMax < childMax) { + System.out.println("DIM: " + d); + System.out.println("max: " + parentMax + " vs " + childMax); + System.out.println(" r: " + parentMax / childMax); + } + if (parentMin > childMin) { + System.out.println("DIM: " + d); + System.out.println("min: " + parentMin + " vs " + childMin); + System.out.println(" r: " + parentMin / childMin); + } } throw new IllegalStateException(); } @@ -429,12 +464,17 @@ void checkNode(QStats s, QNode parent, int depth) { if (subs != null) { throw new IllegalStateException(); } + if (nValues < 2 && parent != null) { + // Leaf nodes (except the root) must contain at least two values. + throw new IllegalStateException(); + } } else { s.nInner++; if (subs.length != 1L< parent, int depth) { ((QNode)n).checkNode(s, this, depth+1); } else if (n != null) { s.nEntries++; + nFoundValues++; checkEntry(n); } } + if (nValues != nFoundValues) { + throw new IllegalStateException(); + } + s.histoValues[nFoundValues]++; s.histo(nSubs); } } @@ -457,10 +502,6 @@ private void checkEntry(Object o) { System.out.println("Node: " + radius + " " + Arrays.toString(center)); System.out.println("Child: " + Arrays.toString(e.point())); for (int d = 0; d < center.length; d++) { -// if ((centerOuter[d]+radiusOuter) / (centerEnclosed[d]+radiusEnclosed) < 0.9999999 || -// (centerOuter[d]-radiusOuter) / (centerEnclosed[d]-radiusEnclosed) > 1.0000001) { -// return false; -// } System.out.println("min/max for " + d); System.out.println("min: " + (center[d]-radius) + " vs " + (e.point()[d])); System.out.println("r=" + (center[d]-radius) / (e.point()[d])); diff --git a/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java b/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java index 018ae45..f2080bd 100644 --- a/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java +++ b/src/main/java/org/tinspin/index/qthypercube2/QuadTreeKD2.java @@ -42,7 +42,7 @@ * With version 1, there were many nodes with just one data entry because the parent directory * node could not hold date entries. * With version two a directory node (which contains a hypercube array) will only - * create a subnode if a quadrant has to mhold more than one data entry. + * create a subnode if a quadrant has to hold more than one data entry. * * * @author ztilmann @@ -378,6 +378,7 @@ private void toStringTree(StringBuilderLn sb, QNode node, int depth, int posInParent) { String prefix = ".".repeat(depth); sb.append(prefix + posInParent + " d=" + depth); + sb.append(" nV=" + node.getValueCount()); sb.append(" " + Arrays.toString(node.getCenter())); sb.appendLn("/" + node.getRadius()); prefix += " "; @@ -418,7 +419,7 @@ public QStats getStats() { * Statistics container class. */ public static class QStats extends Stats { - final int[] histoValues = new int[100]; + final int[] histoValues = new int[1000]; final int[] histoSubs; static final int HISTO_MAX = (1 << 10) + 1; public QStats(int dims) { diff --git a/src/test/java/org/tinspin/index/test/PointMapTest.java b/src/test/java/org/tinspin/index/test/PointMapTest.java index 5a0da7c..224a0de 100644 --- a/src/test/java/org/tinspin/index/test/PointMapTest.java +++ b/src/test/java/org/tinspin/index/test/PointMapTest.java @@ -127,6 +127,7 @@ private void smokeTest(List data) { tree.insert(e.p, e); } // System.out.println(tree.toStringTree()); + tree.getStats(); for (Entry e : data) { assertTrue("contains(point) failed: " + e, tree.contains(e.p)); Entry e2 = tree.queryExact(e.p); diff --git a/src/test/java/org/tinspin/index/test/PointMultimapTest.java b/src/test/java/org/tinspin/index/test/PointMultimapTest.java index f8f21a0..e758e3e 100644 --- a/src/test/java/org/tinspin/index/test/PointMultimapTest.java +++ b/src/test/java/org/tinspin/index/test/PointMultimapTest.java @@ -211,6 +211,7 @@ public void testUpdate() { assertFalse(containsExact(tree, pOld, e.id)); assertTrue(containsExact(tree, e.p, e.id)); } + tree.getStats(); for (int i = 0; i < data.size(); ++i) { Entry e = data.get(i); @@ -327,6 +328,7 @@ public void testIssueKnnRemove() { for (Entry e : data) { tree.insert(e.p, e); } + tree.getStats(); // remove 1st half for (int i = 0; i < nDelete; ++i) { @@ -336,6 +338,7 @@ public void testIssueKnnRemove() { assertFalse(containsExact(tree, e.p, e.id)); assertFalse(tree.contains(e.p, e)); } + tree.getStats(); // check contains() & kNN for (int i = 0; i < nDelete; ++i) { @@ -417,4 +420,42 @@ public String toString() { return "id=" + id + ":" + Arrays.toString(p); } } + + @Test + public void testIssue0040_remove() { + double[][] data = new double[][] { + new double[]{-49.0949020385742, -2.05027413368225, 819588127, 0}, + new double[]{-49.0949020385742, -2.05027389526367, 819588127, 0}, + new double[]{-45.6938514709473, 32.9847145080566, -2056090140, 0}, + new double[]{-45.6938514709473, 32.9847145080566, -2056090140, 0}, + new double[]{-1.7595032453537, 112.097793579102, -267989921, 0}, + new double[]{-1.75950336456299, 112.097793579102, -267989921, 0}, + new double[]{45.6938438415527, 32.9847145080566, 1591613824, 0}, + new double[]{45.6938438415527, 32.9847145080566, 1591613824, 0}, + new double[]{49.0948944091797, -2.05027413368225, 14481734, 0}, + new double[]{49.0948944091797, -2.05027389526367, 14481734, 0}, + new double[]{-49.0949020385742, -2.05027413368225, 819588127, 1}, + new double[]{-49.0949020385742, -2.05027389526367, 819588127, 1}, + new double[]{-49.0949020385742, -2.05027413368225, 916603126, 0}, + }; + + PointMultimap tree = createTree(100,2); + for (int i = 0; i < data.length; i++) { + if (data[i][3] == 0) { + tree.insert(Arrays.copyOf(data[i], 2), (int)data[i][2]); + } else { + tree.remove(Arrays.copyOf(data[i], 2), (int)data[i][2]); + } + } + + assertEquals(9, tree.size()); + int n = 0; + double[] min = new double[]{-50, -3}; + double[] max = new double[]{50, 113}; + for (Index.PointIterator it = tree.query(min, max); it.hasNext(); it.next()) { + n++; + } + assertEquals(9, n); + } + }