From 3d8faa089a703d26549e1a05fb36c86291ee8c0f Mon Sep 17 00:00:00 2001
From: noahnovsak <noah.novsak@gmail.com>
Date: Fri, 14 Oct 2022 13:34:08 +0200
Subject: [PATCH] descriptive interaction variables

---
 orangecontrib/prototypes/interactions.py      | 21 ++++++++-------
 .../prototypes/widgets/owinteractions.py      | 20 +++++++-------
 .../widgets/tests/test_owinteractions.py      | 26 +++++++++----------
 3 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/orangecontrib/prototypes/interactions.py b/orangecontrib/prototypes/interactions.py
index 1b92c31a..d1094534 100644
--- a/orangecontrib/prototypes/interactions.py
+++ b/orangecontrib/prototypes/interactions.py
@@ -41,11 +41,11 @@ def entropy(ar):
     return -np.sum(p * np.log2(p))
 
 
-class Interaction:
+class InteractionScorer:
     def __init__(self, data):
         self.data = data
-        self.class_h = 0
-        self.gains = np.zeros(data.X.shape[1])
+        self.class_entropy = 0
+        self.information_gain = np.zeros(data.X.shape[1])
 
         self.precompute()
 
@@ -61,16 +61,19 @@ def precompute(self):
         well as negative interactions with greater magnitude than the
         combined information gain.
         """
-        self.class_h = entropy(self.data.Y)
-        for attr in range(self.gains.size):
-            self.gains[attr] = self.class_h \
+        self.class_entropy = entropy(self.data.Y)
+        for attr in range(self.information_gain.size):
+            self.information_gain[attr] = self.class_entropy \
                                + entropy(self.data.X[:, attr]) \
                                - entropy(np.column_stack((self.data.X[:, attr], self.data.Y)))
 
     def __call__(self, attr1, attr2):
         attrs = np.column_stack((self.data.X[:, attr1], self.data.X[:, attr2]))
-        return self.class_h \
-               - self.gains[attr1] \
-               - self.gains[attr2] \
+        return self.class_entropy \
+               - self.information_gain[attr1] \
+               - self.information_gain[attr2] \
                + entropy(attrs) \
                - entropy(np.column_stack((attrs, self.data.Y)))
+
+    def normalize(self, score):
+        return score / self.class_entropy
diff --git a/orangecontrib/prototypes/widgets/owinteractions.py b/orangecontrib/prototypes/widgets/owinteractions.py
index 07c8e668..3baa2927 100644
--- a/orangecontrib/prototypes/widgets/owinteractions.py
+++ b/orangecontrib/prototypes/widgets/owinteractions.py
@@ -25,7 +25,7 @@
 from Orange.preprocess import Discretize, Remove
 import Orange.widgets.data.owcorrelations
 
-from orangecontrib.prototypes.interactions import Interaction
+from orangecontrib.prototypes.interactions import InteractionScorer
 
 
 SIZE_LIMIT = 1000000
@@ -149,7 +149,7 @@ class InteractionRank(Orange.widgets.data.owcorrelations.CorrelationRank):
 
 	def __init__(self, *args):
 		VizRankDialogAttrPair.__init__(self, *args)
-		self.interaction = None
+		self.scorer = None
 		self.heuristic = None
 		self.use_heuristic = False
 		self.sel_feature_index = None
@@ -175,19 +175,17 @@ def initialize(self):
 		self.use_heuristic = False
 		self.sel_feature_index = self.master.feature and data.domain.index(self.master.feature)
 		if data:
-			if self.interaction is None or self.interaction.data != data:
-				self.interaction = Interaction(data)
+			if self.scorer is None or self.scorer.data != data:
+				self.scorer = InteractionScorer(data)
 			self.use_heuristic = len(data) * len(self.attrs) ** 2 > SIZE_LIMIT
 			if self.use_heuristic and not self.sel_feature_index:
-				self.heuristic = Heuristic(self.interaction.gains, self.master.heuristic_type)
+				self.heuristic = Heuristic(self.scorer.information_gain, self.master.heuristic_type)
 
 	def compute_score(self, state):
-		attr1, attr2 = state
-		h = self.interaction.class_h
-		score = self.interaction(attr1, attr2) / h
-		gain1 = self.interaction.gains[attr1] / h
-		gain2 = self.interaction.gains[attr2] / h
-		return score, gain1, gain2
+		scores = (self.scorer(*state),
+		          self.scorer.information_gain[state[0]],
+		          self.scorer.information_gain[state[1]])
+		return tuple(self.scorer.normalize(score) for score in scores)
 
 	def row_for_state(self, score, state):
 		attrs = sorted((self.attrs[x] for x in state), key=attrgetter("name"))
diff --git a/orangecontrib/prototypes/widgets/tests/test_owinteractions.py b/orangecontrib/prototypes/widgets/tests/test_owinteractions.py
index cec96370..c854e601 100644
--- a/orangecontrib/prototypes/widgets/tests/test_owinteractions.py
+++ b/orangecontrib/prototypes/widgets/tests/test_owinteractions.py
@@ -13,7 +13,7 @@
 from Orange.widgets.widget import AttributeList
 from orangecontrib.prototypes.widgets.owinteractions import \
 	OWInteractions, Heuristic, HeuristicType, InteractionRank
-from orangecontrib.prototypes.interactions import Interaction
+from orangecontrib.prototypes.interactions import InteractionScorer
 
 
 class TestOWInteractions(WidgetTest):
@@ -276,11 +276,11 @@ def test_compute_score(self):
 		y = np.array([0, 1, 1, 1])
 		domain = Domain([DiscreteVariable(str(i)) for i in range(2)], DiscreteVariable("3"))
 		data = Table(domain, x, y)
-		self.interaction = Interaction(data)
-		npt.assert_almost_equal(self.interaction(0, 1), -0.1226, 4)
-		npt.assert_almost_equal(self.interaction.class_h, 0.8113, 4)
-		npt.assert_almost_equal(self.interaction.gains[0], 0.3113, 4)
-		npt.assert_almost_equal(self.interaction.gains[1], 0.1226, 4)
+		self.scorer = InteractionScorer(data)
+		npt.assert_almost_equal(self.scorer(0, 1), -0.1226, 4)
+		npt.assert_almost_equal(self.scorer.class_entropy, 0.8113, 4)
+		npt.assert_almost_equal(self.scorer.information_gain[0], 0.3113, 4)
+		npt.assert_almost_equal(self.scorer.information_gain[1], 0.1226, 4)
 
 	def test_nans(self):
 		"""Check score calculation with sparse data"""
@@ -288,11 +288,11 @@ def test_nans(self):
 		y = np.array([0, 1, 1, 1, 0, 0, 1])
 		domain = Domain([DiscreteVariable(str(i)) for i in range(2)], DiscreteVariable("3"))
 		data = Table(domain, x, y)
-		self.interaction = Interaction(data)
-		npt.assert_almost_equal(self.interaction(0, 1), 0.0167, 4)
-		npt.assert_almost_equal(self.interaction.class_h, 0.9852, 4)
-		npt.assert_almost_equal(self.interaction.gains[0], 0.4343, 4)
-		npt.assert_almost_equal(self.interaction.gains[1], 0.0343, 4)
+		self.scorer = InteractionScorer(data)
+		npt.assert_almost_equal(self.scorer(0, 1), 0.0167, 4)
+		npt.assert_almost_equal(self.scorer.class_entropy, 0.9852, 4)
+		npt.assert_almost_equal(self.scorer.information_gain[0], 0.4343, 4)
+		npt.assert_almost_equal(self.scorer.information_gain[1], 0.0343, 4)
 
 
 class TestHeuristic(unittest.TestCase):
@@ -302,8 +302,8 @@ def setUpClass(cls):
 
 	def test_heuristic(self):
 		"""Check attribute pairs returned by heuristic"""
-		score = Interaction(self.zoo)
-		heuristic = Heuristic(score.gains, heuristic_type=HeuristicType.INFOGAIN)
+		scorer = InteractionScorer(self.zoo)
+		heuristic = Heuristic(scorer.information_gain, heuristic_type=HeuristicType.INFOGAIN)
 		self.assertListEqual(
 			list(heuristic.get_states(None))[:9],
 			[(14, 6), (14, 10), (14, 15), (6, 10), (14, 5), (6, 15), (14, 11), (6, 5), (10, 15)]