diff --git a/0.1/.buildinfo b/0.1/.buildinfo index 44587ffe..567ac8bc 100644 --- a/0.1/.buildinfo +++ b/0.1/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 4e757390fbfd429264474b4c216ae132 +config: e25750dadb21b97663017b304cf4de14 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/0.1/_images/sphx_glr_01_encodings_001.png b/0.1/_images/sphx_glr_01_encodings_001.png index 267dfe0b..22ba0cf9 100644 Binary files a/0.1/_images/sphx_glr_01_encodings_001.png and b/0.1/_images/sphx_glr_01_encodings_001.png differ diff --git a/0.1/_images/sphx_glr_01_encodings_thumb.png b/0.1/_images/sphx_glr_01_encodings_thumb.png index 8dc3dd58..f0fff56d 100644 Binary files a/0.1/_images/sphx_glr_01_encodings_thumb.png and b/0.1/_images/sphx_glr_01_encodings_thumb.png differ diff --git a/0.1/_images/sphx_glr_08_join_aggregation_003.png b/0.1/_images/sphx_glr_08_join_aggregation_003.png index e01da23c..fc80fd46 100644 Binary files a/0.1/_images/sphx_glr_08_join_aggregation_003.png and b/0.1/_images/sphx_glr_08_join_aggregation_003.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_001.png b/0.1/_images/sphx_glr_09_interpolation_join_001.png index ce2ea4e5..403ab7ad 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_001.png and b/0.1/_images/sphx_glr_09_interpolation_join_001.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_002.png b/0.1/_images/sphx_glr_09_interpolation_join_002.png index b54ceb67..797dccd1 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_002.png and b/0.1/_images/sphx_glr_09_interpolation_join_002.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_003.png b/0.1/_images/sphx_glr_09_interpolation_join_003.png index ee35c411..cd124a19 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_003.png and b/0.1/_images/sphx_glr_09_interpolation_join_003.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_thumb.png b/0.1/_images/sphx_glr_09_interpolation_join_thumb.png index 65994100..c6c7be91 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_thumb.png and b/0.1/_images/sphx_glr_09_interpolation_join_thumb.png differ diff --git a/0.1/_sources/auto_examples/01_encodings.rst.txt b/0.1/_sources/auto_examples/01_encodings.rst.txt index 4305c556..f80b336f 100644 --- a/0.1/_sources/auto_examples/01_encodings.rst.txt +++ b/0.1/_sources/auto_examples/01_encodings.rst.txt @@ -440,41 +440,45 @@ corresponding columns: .. code-block:: none - array(['maintenance, facilities, finance', 'station, state, estate', - 'behavioral, health, school', 'gaithersburg, the, clarksburg', - 'procurement, protective, adult', 'warehouse, delivery, liquor', - 'patrol, 5th, 4th', 'supports, support, network', - 'traffic, safety, alcohol', 'spring, silver, monitoring', - 'family, pedophile, crimes', 'rockville, twinbrook, downtown', - 'administration, battalion, registration', - 'highway, welfare, child', 'technology, inmate, systems', - 'management, equipment, budget', - 'communications, communication, division', - 'nicholson, transit, taxicab', - 'investigative, investigations, criminal', 'security, mc311, mccf', - 'custody, mcdc, quality', 'inspections, inspection, collection', - 'eligibility, assistance, disability', 'services, service, animal', - 'programs, projects, program', 'accounts, receivable, members', - 'planning, training, recruit', 'district, squad, 3rd', - 'emergency, commuter, duplicating', 'director, kingsview, officer', - 'firefighter, recruit, rescuer', - 'paralegal, psychiatrist, employee', - 'income, assistance, compliance', 'librarian, associate, library', - 'candidate, police, sergeant', 'manager, projects, project', + array(['station, state, estate', 'district, patrol, 3rd', + 'silver, spring, urban', 'welfare, children, childhood', + 'services, highway, service', 'traffic, safety, alcohol', + 'gaithersburg, clarksburg, the', 'supports, downtown, support', + 'twinbrook, warehouse, rockville', + 'investigative, criminal, investigations', + 'equipment, automotive, fleet', 'assessment, ombudsman, adult', + 'programs, program, commercial', 'development, delivery, cloverly', + 'nicholson, transit, taxicab', 'abandoned, division, employee', + 'behavioral, health, school', 'maintenance, facilities, finance', + 'technology, inmate, systems', + 'administrative, administration, battalion', + 'toddlers, custody, members', 'building, structural, construction', + 'eligibility, assistance, medical', + 'communications, communication, immunization', + 'training, director, recruit', 'mangement, management, engagement', + 'collection, operations, special', + 'regulatory, environmental, centers', 'assignment, squad, team', + 'security, mc311, mccf', 'representative, legislative, customer', + 'manager, projects, project', 'accountant, assistant, library', + 'candidate, officer, office', 'operator, bus, operations', + 'school, health, room', 'lieutenant, captain, chief', + 'firefighter, rescuer, recruit', 'iii, police, of', + 'program, programs, graphic', + 'information, technology, technologist', + 'sergeant, attendant, police', + 'correctional, correction, corporal', + 'crossing, purchasing, engineer', 'community, nurse, unit', + 'coordinator, services, service', 'assistance, income, client', 'enforcement, permitting, inspector', - 'program, programs, resource', 'operator, bus, operations', - 'administrative, principal, executive', 'captain, rescue, chief', - 'technician, mechanic, supply', 'school, room, behavioral', + 'technician, mechanic, supply', + 'administrative, principal, executive', + 'warehouse, welfare, caseworker', 'transit, public, telephone', + 'sheriff, deputy, therapist', 'specialist, recreation, special', + 'supervisor, supervisory, records', 'master, meter, registered', 'communications, telecommunications, safety', - 'community, health, nurse', 'correctional, correction, corporal', - 'liquor, clerk, store', 'services, president, resident', - 'specialist, special, quality', 'coordinator, coordinating, depot', - 'officer, office, iii', 'master, registered, meter', - 'craftsworker, supervisor, advisor', 'sheriff, deputy, autobody', - 'information, recreation, technology', - 'warehouse, welfare, caseworker', 'crossing, purchasing, engineer', - 'lieutenant, shift, records', 'accountant, assistant, county', - 'equipment, investigator, apprentice'], dtype=object) + 'equipment, investment, investigator', + 'environmental, budget, analyst', 'liquor, clerk, store'], + dtype=object) @@ -556,7 +560,7 @@ Let's look at the cross-validated R2 score of our model: .. code-block:: none - R2 score: mean: 0.922; std: 0.010 + R2 score: mean: 0.921; std: 0.015 @@ -692,7 +696,7 @@ to plot the feature importances. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (1 minutes 12.266 seconds) + **Total running time of the script:** (1 minutes 15.855 seconds) .. _sphx_glr_download_auto_examples_01_encodings.py: diff --git a/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt b/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt index 4a1f733c..80f9d55e 100644 --- a/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt +++ b/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt @@ -504,7 +504,7 @@ as a set of latent topics. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 1.752 seconds) + **Total running time of the script:** (0 minutes 1.841 seconds) .. _sphx_glr_download_auto_examples_02_feature_interpretation_with_gapencoder.py: diff --git a/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt b/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt index 7f8acb21..9955839f 100644 --- a/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt +++ b/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt @@ -610,7 +610,7 @@ and transforms datetime columns by default. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 4.439 seconds) + **Total running time of the script:** (0 minutes 4.448 seconds) .. _sphx_glr_download_auto_examples_03_datetime_encoder.py: diff --git a/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt b/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt index f36167e1..0282b031 100644 --- a/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt +++ b/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt @@ -1711,7 +1711,7 @@ introduced into a grid search: .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 22.247 seconds) + **Total running time of the script:** (0 minutes 20.264 seconds) .. _sphx_glr_download_auto_examples_04_fuzzy_joining.py: diff --git a/0.1/_sources/auto_examples/05_deduplication.rst.txt b/0.1/_sources/auto_examples/05_deduplication.rst.txt index d18f9b90..ff1a28c0 100644 --- a/0.1/_sources/auto_examples/05_deduplication.rst.txt +++ b/0.1/_sources/auto_examples/05_deduplication.rst.txt @@ -335,7 +335,7 @@ or |MinHash|. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 4.893 seconds) + **Total running time of the script:** (0 minutes 5.130 seconds) .. _sphx_glr_download_auto_examples_05_deduplication.py: diff --git a/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt b/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt index 168425da..4d7e6424 100644 --- a/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt +++ b/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt @@ -305,7 +305,7 @@ We will start by checking out the available tables with .. code-block:: none - {'all_entities', 'albums', 'games', 'companies', 'schools', 'movies'} + {'albums', 'companies', 'games', 'all_entities', 'movies', 'schools'} @@ -840,7 +840,7 @@ It helped significantly improve the prediction score. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (11 minutes 7.052 seconds) + **Total running time of the script:** (10 minutes 42.099 seconds) .. _sphx_glr_download_auto_examples_06_ken_embeddings.py: diff --git a/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt b/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt index 0699c4e3..31a6fd57 100644 --- a/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt +++ b/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt @@ -1226,7 +1226,7 @@ The results: /home/circleci/project/miniconda/envs/testenv/lib/python3.10/site-packages/sklearn/preprocessing/_encoders.py:228: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros warnings.warn( - 0.5841000000000001 + 0.5906499999999999 @@ -1244,7 +1244,7 @@ Our final cross-validated accuracy score is 0.58. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (11 minutes 58.676 seconds) + **Total running time of the script:** (10 minutes 58.884 seconds) .. _sphx_glr_download_auto_examples_07_multiple_key_join.py: diff --git a/0.1/_sources/auto_examples/08_join_aggregation.rst.txt b/0.1/_sources/auto_examples/08_join_aggregation.rst.txt index 5fb8d124..b30175dc 100644 --- a/0.1/_sources/auto_examples/08_join_aggregation.rst.txt +++ b/0.1/_sources/auto_examples/08_join_aggregation.rst.txt @@ -844,75 +844,75 @@ operation maximizing our validation score.
tv.named_transformers_["high_cardinality"].get_feature_names_out()
array(['maintenance, facilities, finance', 'station, state, estate',
- 'behavioral, health, school', 'gaithersburg, the, clarksburg',
- 'procurement, protective, adult', 'warehouse, delivery, liquor',
- 'patrol, 5th, 4th', 'supports, support, network',
- 'traffic, safety, alcohol', 'spring, silver, monitoring',
- 'family, pedophile, crimes', 'rockville, twinbrook, downtown',
- 'administration, battalion, registration',
- 'highway, welfare, child', 'technology, inmate, systems',
- 'management, equipment, budget',
- 'communications, communication, division',
- 'nicholson, transit, taxicab',
- 'investigative, investigations, criminal', 'security, mc311, mccf',
- 'custody, mcdc, quality', 'inspections, inspection, collection',
- 'eligibility, assistance, disability', 'services, service, animal',
- 'programs, projects, program', 'accounts, receivable, members',
- 'planning, training, recruit', 'district, squad, 3rd',
- 'emergency, commuter, duplicating', 'director, kingsview, officer',
- 'firefighter, recruit, rescuer',
- 'paralegal, psychiatrist, employee',
- 'income, assistance, compliance', 'librarian, associate, library',
- 'candidate, police, sergeant', 'manager, projects, project',
+array(['station, state, estate', 'district, patrol, 3rd',
+ 'silver, spring, urban', 'welfare, children, childhood',
+ 'services, highway, service', 'traffic, safety, alcohol',
+ 'gaithersburg, clarksburg, the', 'supports, downtown, support',
+ 'twinbrook, warehouse, rockville',
+ 'investigative, criminal, investigations',
+ 'equipment, automotive, fleet', 'assessment, ombudsman, adult',
+ 'programs, program, commercial', 'development, delivery, cloverly',
+ 'nicholson, transit, taxicab', 'abandoned, division, employee',
+ 'behavioral, health, school', 'maintenance, facilities, finance',
+ 'technology, inmate, systems',
+ 'administrative, administration, battalion',
+ 'toddlers, custody, members', 'building, structural, construction',
+ 'eligibility, assistance, medical',
+ 'communications, communication, immunization',
+ 'training, director, recruit', 'mangement, management, engagement',
+ 'collection, operations, special',
+ 'regulatory, environmental, centers', 'assignment, squad, team',
+ 'security, mc311, mccf', 'representative, legislative, customer',
+ 'manager, projects, project', 'accountant, assistant, library',
+ 'candidate, officer, office', 'operator, bus, operations',
+ 'school, health, room', 'lieutenant, captain, chief',
+ 'firefighter, rescuer, recruit', 'iii, police, of',
+ 'program, programs, graphic',
+ 'information, technology, technologist',
+ 'sergeant, attendant, police',
+ 'correctional, correction, corporal',
+ 'crossing, purchasing, engineer', 'community, nurse, unit',
+ 'coordinator, services, service', 'assistance, income, client',
'enforcement, permitting, inspector',
- 'program, programs, resource', 'operator, bus, operations',
- 'administrative, principal, executive', 'captain, rescue, chief',
- 'technician, mechanic, supply', 'school, room, behavioral',
+ 'technician, mechanic, supply',
+ 'administrative, principal, executive',
+ 'warehouse, welfare, caseworker', 'transit, public, telephone',
+ 'sheriff, deputy, therapist', 'specialist, recreation, special',
+ 'supervisor, supervisory, records', 'master, meter, registered',
'communications, telecommunications, safety',
- 'community, health, nurse', 'correctional, correction, corporal',
- 'liquor, clerk, store', 'services, president, resident',
- 'specialist, special, quality', 'coordinator, coordinating, depot',
- 'officer, office, iii', 'master, registered, meter',
- 'craftsworker, supervisor, advisor', 'sheriff, deputy, autobody',
- 'information, recreation, technology',
- 'warehouse, welfare, caseworker', 'crossing, purchasing, engineer',
- 'lieutenant, shift, records', 'accountant, assistant, county',
- 'equipment, investigator, apprentice'], dtype=object)
+ 'equipment, investment, investigator',
+ 'environmental, budget, analyst', 'liquor, clerk, store'],
+ dtype=object)
R2 score: mean: 0.922; std: 0.010
+R2 score: mean: 0.921; std: 0.015
The simple pipeline applied on this complex dataset gave us very good results.
@@ -933,7 +937,7 @@ ConclusionTotal running time of the script: (1 minutes 12.266 seconds)
+Total running time of the script: (1 minutes 15.855 seconds)