Refactor examples of EulerFD using

Desbordante · Jan 10, 2025 · 2a4cca3 · 2a4cca3
1 parent 3701384
commit 2a4cca3
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 29 deletions.
diff --git a/examples/advanced/comparison_mining_fd_approximate.py b/examples/advanced/comparison_mining_fd_approximate.py
@@ -2,8 +2,9 @@
 
 print('''
 =======================================================
-This example show some work features of algorithms for
-searching FD approximately: AidFD and EulerFD.
+This example demonstrates key characteristics of the
+approximate functional dependency (FD) discovery
+algorithms, AID-FD and EulerFD.
 =======================================================\n
 ''')
 
@@ -16,50 +17,54 @@
 alg = desbordante.fd.algorithms.EulerFD()
 alg.load_data(table=(TABLE, ';', False))
 
-print("EulerFD is randomized algorithm. Answer is depended on seed: ")
+print("EulerFD is a randomized algorithm, and its results vary based on the seed value. For instance:")
 
 alg.execute(custom_random_seed=SEED_76_FD)
 result76 = set(alg.get_fds())
-print(f"Seed is {SEED_76_FD}, number of FDs is {len(result76)}")
+print(f"With a seed of {SEED_76_FD}, EulerFD found {len(result76)} FDs.")
 
 alg.execute(custom_random_seed=SEED_78_FD)
 result78 = set(alg.get_fds())
-print(f"Seed is {SEED_78_FD}, number of FDs is {len(result78)}")
+print(f"With a seed of {SEED_78_FD}, EulerFD found {len(result78)} FDs.")
 
 alg.execute(custom_random_seed=SEED_80_FD)
 result80 = set(alg.get_fds())
-print(f"Seed is {SEED_80_FD}, number of FDs is {len(result80)}")
+print(f"With a seed of {SEED_80_FD}, EulerFD found {len(result80)} FDs.")
 
 exact_fd = desbordante.fd.algorithms.HyFD()
 exact_fd.load_data(table=(TABLE, ';', False))
 exact_fd.execute()
 result_exact = set(exact_fd.get_fds())
 
-print(f"Exact algorithm, number of FDs is {len(result_exact)}")
+print(f"An exact FD discovery algorithm, in contrast, consistently identified 78 FDs.")
 
-print("EulerFD can get extra answers (false FDs) or not find some FDs")
+print('''
+This highlights a key property of EulerFD: it may produce results with both
+false positives (extra FDs) and false negatives (missing FDs)
+compared to exact methods.
+''')
 
 print()
 print("---------------------------------------------------------------------")
-print("Lets look at difference between answer of exact algorithm and EulerFD")
+print("Let's examine the differences between the results of the exact algorithm and EulerFD.")
 print()
-print(f"Check first answer with seed {SEED_76_FD}, whick get as 76 FDs")
+print(f"First, consider the results with a seed of {SEED_76_FD}, where EulerFD identified {len(result76)} FDs.")
 
 diff_76 = result_exact - result76
-print(f"EulerFD not find {len(diff_76)} FD:")
+print(f"Compared to the exact method, EulerFD failed to identify the following {len(diff_76)} FDs:")
 for fd in diff_76:
     print(fd)
 
 diff_76 = result76 - result_exact
-print(f"EulerFD find {len(diff_76)} false FD:")
+print(f"Additionally, it incorrectly identified these {len(diff_76)} false FDs:")
 for fd in diff_76:
     print(fd)
 
-print("EulerFD in one answer can not find some FDs and get false FDs instead")
+print("Thus, a single run of EulerFD can both miss valid FDs and generate false FDs.")
 
 print()
-print(f"Lest check second answer with seed {SEED_78_FD}, which get as 78 FDs")
-print(f"EulerFD not find {len(result_exact - result78)} FDs.")
-print(f"EulerFD find {len(result78 - result_exact)} false FDs:")
-print(f"EulerFD get exact answer with seed {SEED_78_FD}")
+print(f"Next, let's analyze the results with a seed of {SEED_78_FD}, where EulerFD identified 78 FDs") 
+print(f"EulerFD not found {len(result_exact - result78)} FDs.")
+print(f"EulerFD found {len(result78 - result_exact)} false FDs.")
+print(f"Therefore, with the seed {SEED_78_FD}, EulerFD obtained the exact result.")
 
diff --git a/examples/basic/mining_fd_approximate.py b/examples/basic/mining_fd_approximate.py
@@ -2,20 +2,28 @@
 
 print('''
 =======================================================
-This example show how Desbardante can seaching
-functional dependencies (FD) approximately. There are 2
-algorithms: EulerFD and AidFD, which can approximately
-search FD, they may not find some FD or get false FD
-unlike exact FD algorithms, but works by several
-times faster.
-
-For more information consider:
+This example demonstrates how Desbordante can perform
+approximate functional dependency (FD) discovery
+methods.
+It utilizes two algorithms, EulerFD and AID-FD, which
+offer significant speed advantages over exact
+FD discovery methods. While these algorithms may not
+identify all true FDs or might occasionally yield
+false positives, they achieve substantially faster
+processing times.
+
+For more in-depth information, please refer
+to the following publications:
 1) EulerFD: An Efficient Double-Cycle Approximation
    of Functional Dependencies by
    Qiongqiong Lin, Yunfan Gu, Jingyan Sa et al.
-2) TODO: AidFD article
+2) Approximate Discovery of Functional Dependencies
+   for Large Datasets by Tobias Bleifuss,
+   Susanne Bulow, Johannes Frohnhofen et al.
 =======================================================\n
-Now, we are going to demonstrate how to use EulerFD and AidFD.''')
+We will now demonstrate how to invoke EulerFD and
+AID-FD in Desbordante.
+''')
 
 TABLE = 'examples/datasets/medical.csv'
 
@@ -31,7 +39,7 @@
 
 print('-------------------------------')
 
-print("AidFD: ")
+print("AID-FD: ")
 alg = desbordante.fd.algorithms.Aid()
 alg.load_data(table=(TABLE, ',', True))
 alg.execute()
@@ -41,4 +49,4 @@
 for fd in result_aid:
     print(fd)
 
-print("Also there is more complex example in advanced part with more work features of algorithms")
+print("In the advanced section, a more complex example will showcase additional features of the algorithms.")