From 3d8faa089a703d26549e1a05fb36c86291ee8c0f Mon Sep 17 00:00:00 2001 From: noahnovsak Date: Fri, 14 Oct 2022 13:34:08 +0200 Subject: [PATCH] descriptive interaction variables --- orangecontrib/prototypes/interactions.py | 21 ++++++++------- .../prototypes/widgets/owinteractions.py | 20 +++++++------- .../widgets/tests/test_owinteractions.py | 26 +++++++++---------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/orangecontrib/prototypes/interactions.py b/orangecontrib/prototypes/interactions.py index 1b92c31a..d1094534 100644 --- a/orangecontrib/prototypes/interactions.py +++ b/orangecontrib/prototypes/interactions.py @@ -41,11 +41,11 @@ def entropy(ar): return -np.sum(p * np.log2(p)) -class Interaction: +class InteractionScorer: def __init__(self, data): self.data = data - self.class_h = 0 - self.gains = np.zeros(data.X.shape[1]) + self.class_entropy = 0 + self.information_gain = np.zeros(data.X.shape[1]) self.precompute() @@ -61,16 +61,19 @@ def precompute(self): well as negative interactions with greater magnitude than the combined information gain. """ - self.class_h = entropy(self.data.Y) - for attr in range(self.gains.size): - self.gains[attr] = self.class_h \ + self.class_entropy = entropy(self.data.Y) + for attr in range(self.information_gain.size): + self.information_gain[attr] = self.class_entropy \ + entropy(self.data.X[:, attr]) \ - entropy(np.column_stack((self.data.X[:, attr], self.data.Y))) def __call__(self, attr1, attr2): attrs = np.column_stack((self.data.X[:, attr1], self.data.X[:, attr2])) - return self.class_h \ - - self.gains[attr1] \ - - self.gains[attr2] \ + return self.class_entropy \ + - self.information_gain[attr1] \ + - self.information_gain[attr2] \ + entropy(attrs) \ - entropy(np.column_stack((attrs, self.data.Y))) + + def normalize(self, score): + return score / self.class_entropy diff --git a/orangecontrib/prototypes/widgets/owinteractions.py b/orangecontrib/prototypes/widgets/owinteractions.py index 07c8e668..3baa2927 100644 --- a/orangecontrib/prototypes/widgets/owinteractions.py +++ b/orangecontrib/prototypes/widgets/owinteractions.py @@ -25,7 +25,7 @@ from Orange.preprocess import Discretize, Remove import Orange.widgets.data.owcorrelations -from orangecontrib.prototypes.interactions import Interaction +from orangecontrib.prototypes.interactions import InteractionScorer SIZE_LIMIT = 1000000 @@ -149,7 +149,7 @@ class InteractionRank(Orange.widgets.data.owcorrelations.CorrelationRank): def __init__(self, *args): VizRankDialogAttrPair.__init__(self, *args) - self.interaction = None + self.scorer = None self.heuristic = None self.use_heuristic = False self.sel_feature_index = None @@ -175,19 +175,17 @@ def initialize(self): self.use_heuristic = False self.sel_feature_index = self.master.feature and data.domain.index(self.master.feature) if data: - if self.interaction is None or self.interaction.data != data: - self.interaction = Interaction(data) + if self.scorer is None or self.scorer.data != data: + self.scorer = InteractionScorer(data) self.use_heuristic = len(data) * len(self.attrs) ** 2 > SIZE_LIMIT if self.use_heuristic and not self.sel_feature_index: - self.heuristic = Heuristic(self.interaction.gains, self.master.heuristic_type) + self.heuristic = Heuristic(self.scorer.information_gain, self.master.heuristic_type) def compute_score(self, state): - attr1, attr2 = state - h = self.interaction.class_h - score = self.interaction(attr1, attr2) / h - gain1 = self.interaction.gains[attr1] / h - gain2 = self.interaction.gains[attr2] / h - return score, gain1, gain2 + scores = (self.scorer(*state), + self.scorer.information_gain[state[0]], + self.scorer.information_gain[state[1]]) + return tuple(self.scorer.normalize(score) for score in scores) def row_for_state(self, score, state): attrs = sorted((self.attrs[x] for x in state), key=attrgetter("name")) diff --git a/orangecontrib/prototypes/widgets/tests/test_owinteractions.py b/orangecontrib/prototypes/widgets/tests/test_owinteractions.py index cec96370..c854e601 100644 --- a/orangecontrib/prototypes/widgets/tests/test_owinteractions.py +++ b/orangecontrib/prototypes/widgets/tests/test_owinteractions.py @@ -13,7 +13,7 @@ from Orange.widgets.widget import AttributeList from orangecontrib.prototypes.widgets.owinteractions import \ OWInteractions, Heuristic, HeuristicType, InteractionRank -from orangecontrib.prototypes.interactions import Interaction +from orangecontrib.prototypes.interactions import InteractionScorer class TestOWInteractions(WidgetTest): @@ -276,11 +276,11 @@ def test_compute_score(self): y = np.array([0, 1, 1, 1]) domain = Domain([DiscreteVariable(str(i)) for i in range(2)], DiscreteVariable("3")) data = Table(domain, x, y) - self.interaction = Interaction(data) - npt.assert_almost_equal(self.interaction(0, 1), -0.1226, 4) - npt.assert_almost_equal(self.interaction.class_h, 0.8113, 4) - npt.assert_almost_equal(self.interaction.gains[0], 0.3113, 4) - npt.assert_almost_equal(self.interaction.gains[1], 0.1226, 4) + self.scorer = InteractionScorer(data) + npt.assert_almost_equal(self.scorer(0, 1), -0.1226, 4) + npt.assert_almost_equal(self.scorer.class_entropy, 0.8113, 4) + npt.assert_almost_equal(self.scorer.information_gain[0], 0.3113, 4) + npt.assert_almost_equal(self.scorer.information_gain[1], 0.1226, 4) def test_nans(self): """Check score calculation with sparse data""" @@ -288,11 +288,11 @@ def test_nans(self): y = np.array([0, 1, 1, 1, 0, 0, 1]) domain = Domain([DiscreteVariable(str(i)) for i in range(2)], DiscreteVariable("3")) data = Table(domain, x, y) - self.interaction = Interaction(data) - npt.assert_almost_equal(self.interaction(0, 1), 0.0167, 4) - npt.assert_almost_equal(self.interaction.class_h, 0.9852, 4) - npt.assert_almost_equal(self.interaction.gains[0], 0.4343, 4) - npt.assert_almost_equal(self.interaction.gains[1], 0.0343, 4) + self.scorer = InteractionScorer(data) + npt.assert_almost_equal(self.scorer(0, 1), 0.0167, 4) + npt.assert_almost_equal(self.scorer.class_entropy, 0.9852, 4) + npt.assert_almost_equal(self.scorer.information_gain[0], 0.4343, 4) + npt.assert_almost_equal(self.scorer.information_gain[1], 0.0343, 4) class TestHeuristic(unittest.TestCase): @@ -302,8 +302,8 @@ def setUpClass(cls): def test_heuristic(self): """Check attribute pairs returned by heuristic""" - score = Interaction(self.zoo) - heuristic = Heuristic(score.gains, heuristic_type=HeuristicType.INFOGAIN) + scorer = InteractionScorer(self.zoo) + heuristic = Heuristic(scorer.information_gain, heuristic_type=HeuristicType.INFOGAIN) self.assertListEqual( list(heuristic.get_states(None))[:9], [(14, 6), (14, 10), (14, 15), (6, 10), (14, 5), (6, 15), (14, 11), (6, 5), (10, 15)]