Desbordante · chernishev · Nov 27, 2024 · Sep 13, 2024 · Nov 2, 2024 · Sep 13, 2024
diff --git a/examples/basic/mining_ar.py b/examples/basic/mining_ar.py
@@ -27,6 +27,8 @@ def print_ars(ars):
     print('Total count of ARs:', len(ars))
     print('The first 10 ARs:')
     for ar in ars[:10]:
+        print('conf: ', end='')
+
         if ar.confidence > 0.9:
             print(COLOR_CODES['bold_green'], end='')
         elif ar.confidence > 0.3:
@@ -36,6 +38,17 @@ def print_ars(ars):
 
         print('{:1.2f}'.format(ar.confidence), 
               COLOR_CODES['default'], end='\t')
+        print('sup: ', end='')
+
+        if ar.support > 0.9:
+            print(COLOR_CODES['bold_green'], end='')
+        elif ar.support > 0.3:
+            print(COLOR_CODES['bold_yellow'], end='')
+        else:
+            print(COLOR_CODES['bold_red'], end='')
+
+        print('{:1.2f}'.format(ar.support),
+              COLOR_CODES['default'], end='\t')
         print(ar.left, '->', ar.right, )
 
 
@@ -47,7 +60,7 @@ def print_itemnames(itemnames):
 
 def scenario_tabular():
     algo = desbordante.ar.algorithms.Default()
-    algo.load_data(table=(TABLE_TABULAR, ',', True), input_format='tabular')
+    algo.load_data(table=(TABLE_TABULAR, ',', False), input_format='tabular')
     algo.execute(minconf=1)
     table = pandas.read_csv(TABLE_TABULAR, header=None)
 
@@ -60,20 +73,21 @@ def scenario_tabular():
     print("\nLet's see the first 10 association rules (ARs) that are present "
           'in the dataset with minconf=1. As no minsup is specified, '
           'the default value of minsup=0 is used.\n')
+
     print_ars(algo.get_ars())
-    print("\n['Eggs'] -> ['Milk'] with confidence 1 means that whenever eggs "
-          'are found in the receipt, milk will '
+    print("\n['Butter'] -> ['Bread'] with confidence 1 means that whenever butter "
+          'is found in the receipt, bread will '
           f'{COLOR_CODES["green"]}always{COLOR_CODES["default"]} '
           'be present as well. The same holds true for all other rules with '
           f'{COLOR_CODES["bold_green"]}confidence 1{COLOR_CODES["default"]}.')
 
     print("\n\nNow, let's examine the same dataset with "
-          f'{COLOR_CODES["yellow"]}minconf=0.7{COLOR_CODES["default"]}.')
-    algo.execute(minconf=0.7)
+          f'{COLOR_CODES["yellow"]}minconf=0.6{COLOR_CODES["default"]}.')
+    algo.execute(minconf=0.6)
     print_ars(algo.get_ars())
-    print("\n['Milk'] -> ['Eggs'] with confidence 0.75 means that when milk "
+    print("\n['Yogurt'] -> ['Eggs'] with confidence 0.67 means that when milk "
           'is found in the receipt, the chance of eggs being '
-          'present amounts to 75%. So, customers are '
+          'present amounts to 67%. So, customers are '
           f'{COLOR_CODES["bold_yellow"]}likely{COLOR_CODES["default"]} '
           'to buy eggs with milk.')
 
@@ -89,24 +103,21 @@ def scenario_tabular():
           'Since the default support value is 0, the system discovers '
           'all association rules, even those that only occur once '
           "in the dataset. Now, let's see the results with "
-          f'{COLOR_CODES["yellow"]}minsup=0.5{COLOR_CODES["default"]} and '
-          f'{COLOR_CODES["yellow"]}minconf=0.5{COLOR_CODES["default"]}.\n')
-    algo.execute(minsup=0.5, minconf=0.5)
+          f'{COLOR_CODES["yellow"]}minsup=0.4{COLOR_CODES["default"]} and '
+          f'{COLOR_CODES["yellow"]}minconf=0.6{COLOR_CODES["default"]}.\n')
+    algo.execute(minsup=0.4, minconf=0.6)
     print_ars(algo.get_ars())
     print('\nNow you can see that the number of association rules have decreased '
-          'significantly. This happened due to minsup being set to 0.5. '
-          'Unfortunately, if you want to know what the support value is for a '
-          'particular association rule in a dataset, '
-          "you can't get it with Desbordante.\n"
+          'significantly. This happened due to minsup being set to 0.5. \n'
           '\nA typical approach to controlling the algorithm is to employ '
           f'{COLOR_CODES["bold_yellow"]}"usefulness"{COLOR_CODES["default"]}, '
           'which is defined as confidence * support. '
           'In the last example, we set up min '
-          '"usefulness" = 0.5 * 0.5 = 0.25. \n\nNow, let\'s try with '
-          f'{COLOR_CODES["green"]}minsup=0.7{COLOR_CODES["default"]}, '
-          f'{COLOR_CODES["green"]}minconf=0.7{COLOR_CODES["default"]} and '
-          f'{COLOR_CODES["bold_yellow"]}"usefulness"=0.49{COLOR_CODES["default"]}.\n')
-    algo.execute(minsup=0.7, minconf=0.7)
+          '"usefulness" = 0.6 * 0.4 = 0.24. \n\nNow, let\'s try with '
+          f'{COLOR_CODES["green"]}minsup=0.6{COLOR_CODES["default"]}, '
+          f'{COLOR_CODES["green"]}minconf=0.6{COLOR_CODES["default"]} and '
+          f'{COLOR_CODES["bold_yellow"]}"usefulness"=0.36{COLOR_CODES["default"]}.\n')
+    algo.execute(minsup=0.6, minconf=0.6)
     print_ars(algo.get_ars())
     print('\nSo, now the total number of returned association rules '
           'is only four. We reduced the amount of "noisy" information '
@@ -117,7 +128,7 @@ def scenario_tabular():
 
 def scenario_singular():
     algo = desbordante.ar.algorithms.Default()
-    algo.load_data(table=(TABLE_SINGULAR, ',', True), input_format='singular')
+    algo.load_data(table=(TABLE_SINGULAR, ',', False), input_format='singular')
     algo.execute()
     table = pandas.read_csv(TABLE_SINGULAR, header=None, index_col=0)
 

diff --git a/src/core/algorithms/association_rules/ar.h b/src/core/algorithms/association_rules/ar.h
@@ -11,11 +11,16 @@ struct ArIDs {
     std::vector<unsigned> left;   // antecedent
     std::vector<unsigned> right;  // consequent
     double confidence = -1;
+    double support = -1;
 
     ArIDs() = default;
 
-    ArIDs(std::vector<unsigned> left, std::vector<unsigned> right, double confidence)
-        : left(std::move(left)), right(std::move(right)), confidence(confidence) {}
+    ArIDs(std::vector<unsigned> left, std::vector<unsigned> right, double confidence,
+          double support)
+        : left(std::move(left)),
+          right(std::move(right)),
+          confidence(confidence),
+          support(support) {}
 
     ArIDs(ArIDs const& other) = default;
     ArIDs& operator=(ArIDs const& other) = default;
@@ -27,14 +32,19 @@ struct ARStrings {
     std::list<std::string> left;   // antecedent
     std::list<std::string> right;  // consequent
     double confidence = -1;
+    double support = -1;
 
     ARStrings() = default;
 
-    ARStrings(std::list<std::string> left, std::list<std::string> right, double confidence)
-        : left(std::move(left)), right(std::move(right)), confidence(confidence) {}
+    ARStrings(std::list<std::string> left, std::list<std::string> right, double confidence,
+              double support)
+        : left(std::move(left)),
+          right(std::move(right)),
+          confidence(confidence),
+          support(support) {}
 
     ARStrings(ArIDs const& id_format_rule, TransactionalData const* transactional_data)
-        : confidence(id_format_rule.confidence) {
+        : confidence(id_format_rule.confidence), support(id_format_rule.support) {
         std::vector<std::string> const& item_names_map = transactional_data->GetItemUniverse();
 
         for (auto item_id : id_format_rule.left) {
@@ -52,7 +62,10 @@ struct ARStrings {
 
     std::string ToString() const {
         std::string result;
+        result.append("conf: ");
         result.append(std::to_string(confidence));
+        result.append("\tsup: ");
+        result.append(std::to_string(support));
         result.append("\t{");
         for (auto const& item_name : left) {
             result.append(item_name);

diff --git a/src/core/algorithms/association_rules/ar_algorithm.cpp b/src/core/algorithms/association_rules/ar_algorithm.cpp
@@ -87,8 +87,8 @@ void ARAlgorithm::GenerateRulesFrom(std::vector<unsigned> const& frequent_itemse
         auto const lhs_support = GetSupport(lhs);
         auto const confidence = support / lhs_support;
         if (confidence >= minconf_) {
-            auto const& new_ar =
-                    ar_collection_.emplace_back(std::move(lhs), std::move(rhs), confidence);
+            auto const& new_ar = ar_collection_.emplace_back(std::move(lhs), std::move(rhs),
+                                                             confidence, support);
             root_.children.emplace_back(new_ar);
         }
     }
@@ -146,8 +146,8 @@ bool ARAlgorithm::MergeRules(std::vector<unsigned> const& frequent_itemset, doub
             auto const lhs_support = GetSupport(lhs);
             auto const confidence = support / lhs_support;
             if (confidence >= minconf_) {
-                auto const& new_ar =
-                        ar_collection_.emplace_back(std::move(lhs), std::move(rhs), confidence);
+                auto const& new_ar = ar_collection_.emplace_back(std::move(lhs), std::move(rhs),
+                                                                 confidence, support);
                 child_iter->children.emplace_back(new_ar);
                 is_rule_produced = true;
             }

diff --git a/src/python_bindings/ar/bind_ar.cpp b/src/python_bindings/ar/bind_ar.cpp
@@ -22,12 +22,14 @@ void BindAr(py::module_& main_module) {
             .def("__str__", &ARStrings::ToString)
             .def_readonly("left", &ARStrings::left)
             .def_readonly("right", &ARStrings::right)
-            .def_readonly("confidence", &ARStrings::confidence);
+            .def_readonly("confidence", &ARStrings::confidence)
+            .def_readonly("support", &ARStrings::support);
 
     py::class_<ArIDs>(ar_module, "ArIDs")
             .def_readonly("left", &ArIDs::left)
             .def_readonly("right", &ArIDs::right)
-            .def_readonly("confidence", &ArIDs::confidence);
+            .def_readonly("confidence", &ArIDs::confidence)
+            .def_readonly("support", &ArIDs::support);
 
     py::class_<ARAlgorithm, Algorithm>(ar_module, "ArAlgorithm")
             .def("get_ars", &ARAlgorithm::GetArStringsList, py::return_value_policy::move)

diff --git a/src/tests/all_csv_configs.cpp b/src/tests/all_csv_configs.cpp
@@ -72,6 +72,8 @@ CSVConfig const kTestZeros = CreateCsvConfig("TestZeros.csv", ',', true);
 CSVConfig const kNullEmpty = CreateCsvConfig("NullEmpty.csv", ',', true);
 CSVConfig const kSimpleTypes = CreateCsvConfig("SimpleTypes.csv", ',', true);
 CSVConfig const kRulesBook = CreateCsvConfig("transactional_data/rules-book.csv", ',', false);
+CSVConfig const kRulesBookRows =
+        CreateCsvConfig("transactional_data/rules-book-rows.csv", ',', false);
 CSVConfig const kRulesPresentationExtended =
         CreateCsvConfig("transactional_data/rules-presentation-extended.csv", ',', false);
 CSVConfig const kRulesPresentation =

diff --git a/src/tests/all_csv_configs.h b/src/tests/all_csv_configs.h
@@ -64,6 +64,7 @@ extern CSVConfig const kTestZeros;
 extern CSVConfig const kNullEmpty;
 extern CSVConfig const kSimpleTypes;
 extern CSVConfig const kRulesBook;
+extern CSVConfig const kRulesBookRows;
 extern CSVConfig const kRulesPresentationExtended;
 extern CSVConfig const kRulesPresentation;
 extern CSVConfig const kRulesSynthetic2;

diff --git a/src/tests/test_apriori.cpp b/src/tests/test_apriori.cpp
@@ -39,6 +39,26 @@ void CheckAssociationRulesListsEquality(
     SUCCEED();
 }
 
+void CheckSupportAndConfidence(std::list<model::ARStrings> const& actual,
+                               std::set<std::string> const& lhs, std::set<std::string> const& rhs,
+                               double expected_support, double expected_confidence) {
+    for (auto const& rule : actual) {
+        std::set<std::string> actual_lhs(rule.left.begin(), rule.left.end());
+        std::set<std::string> actual_rhs(rule.right.begin(), rule.right.end());
+
+        if (lhs == actual_lhs && rhs == actual_rhs) {
+            EXPECT_DOUBLE_EQ(rule.support, expected_support)
+                    << "supports don't match: expected " << expected_support
+                    << ", got: " << rule.support;
+            EXPECT_DOUBLE_EQ(rule.confidence, expected_confidence)
+                    << "confidences don't match: expected " << expected_confidence
+                    << ", got: " << rule.confidence;
+            return;
+        }
+    }
+    ADD_FAILURE() << "expected rule not found in generated rules";
+}
+
 static std::set<std::pair<std::set<std::string>, std::set<std::string>>> ToSet(
         std::list<model::ARStrings> const& rules) {
     std::set<std::pair<std::set<std::string>, std::set<std::string>>> set;
@@ -318,4 +338,28 @@ TEST_F(ARAlgorithmTest, RepeatedExecutionConsistentResult) {
     }
 }
 
+TEST_F(ARAlgorithmTest, SupportAndConfidenceSingular) {
+    auto algorithm = CreateAlgorithmInstance(kRulesBook, 0.2, 0.5, 0, 1);
+    algorithm->Execute();
+    auto result = algorithm->GetArStringsList();
+    CheckSupportAndConfidence(result, {"Eggs"}, {"Milk"}, 0.6, 1);
+    CheckSupportAndConfidence(result, {"Bread"}, {"Eggs"}, 0.2, 0.5);
+    CheckSupportAndConfidence(result, {"Yogurt"}, {"Milk"}, 0.6, 1);
+    CheckSupportAndConfidence(result, {"Bread"}, {"Milk"}, 0.4, 1);
+    CheckSupportAndConfidence(result, {"Cheese"}, {"Milk"}, 0.4, 1);
+    CheckSupportAndConfidence(result, {"Milk", "Bread"}, {"Eggs"}, 0.2, 0.5);
+}
+
+TEST_F(ARAlgorithmTest, SupportAndConfidenceTabular) {
+    auto algorithm = CreateAlgorithmInstance(kRulesBookRows, 0.2, 0.5, false);
+    algorithm->Execute();
+    auto result = algorithm->GetArStringsList();
+    CheckSupportAndConfidence(result, {"Eggs"}, {"Milk"}, 0.6, 1);
+    CheckSupportAndConfidence(result, {"Bread"}, {"Eggs"}, 0.2, 0.5);
+    CheckSupportAndConfidence(result, {"Yogurt"}, {"Milk"}, 0.6, 1);
+    CheckSupportAndConfidence(result, {"Bread"}, {"Milk"}, 0.4, 1);
+    CheckSupportAndConfidence(result, {"Cheese"}, {"Milk"}, 0.4, 1);
+    CheckSupportAndConfidence(result, {"Milk", "Bread"}, {"Eggs"}, 0.2, 0.5);
+}
+
 }  // namespace tests