diff --git a/0.1/.buildinfo b/0.1/.buildinfo index 42e6e1fd..c06196d0 100644 --- a/0.1/.buildinfo +++ b/0.1/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 55bb307912a4f2d9d48195c7e170ecb8 +config: 8f283ab8d9420186222c16786ac862f9 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/0.1/_images/sphx_glr_01_encodings_001.png b/0.1/_images/sphx_glr_01_encodings_001.png index 49ac3f42..41af42fb 100644 Binary files a/0.1/_images/sphx_glr_01_encodings_001.png and b/0.1/_images/sphx_glr_01_encodings_001.png differ diff --git a/0.1/_images/sphx_glr_01_encodings_thumb.png b/0.1/_images/sphx_glr_01_encodings_thumb.png index f290b54c..2f2503be 100644 Binary files a/0.1/_images/sphx_glr_01_encodings_thumb.png and b/0.1/_images/sphx_glr_01_encodings_thumb.png differ diff --git a/0.1/_images/sphx_glr_08_join_aggregation_003.png b/0.1/_images/sphx_glr_08_join_aggregation_003.png index 22f957b0..a6275541 100644 Binary files a/0.1/_images/sphx_glr_08_join_aggregation_003.png and b/0.1/_images/sphx_glr_08_join_aggregation_003.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_001.png b/0.1/_images/sphx_glr_09_interpolation_join_001.png index eac5513a..6f06c9e0 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_001.png and b/0.1/_images/sphx_glr_09_interpolation_join_001.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_002.png b/0.1/_images/sphx_glr_09_interpolation_join_002.png index ddf09d4d..079b2cd0 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_002.png and b/0.1/_images/sphx_glr_09_interpolation_join_002.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_003.png b/0.1/_images/sphx_glr_09_interpolation_join_003.png index a8dc7a84..c7346677 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_003.png and b/0.1/_images/sphx_glr_09_interpolation_join_003.png differ diff --git a/0.1/_images/sphx_glr_09_interpolation_join_thumb.png b/0.1/_images/sphx_glr_09_interpolation_join_thumb.png index 5c3ec005..b9e28afc 100644 Binary files a/0.1/_images/sphx_glr_09_interpolation_join_thumb.png and b/0.1/_images/sphx_glr_09_interpolation_join_thumb.png differ diff --git a/0.1/_sources/auto_examples/01_encodings.rst.txt b/0.1/_sources/auto_examples/01_encodings.rst.txt index 6a0c38f2..fd68e6f8 100644 --- a/0.1/_sources/auto_examples/01_encodings.rst.txt +++ b/0.1/_sources/auto_examples/01_encodings.rst.txt @@ -440,47 +440,42 @@ corresponding columns: .. code-block:: none - array(['accountability, accounts, community', - 'assessment, protective, treatment', - 'communications, communication, telecommunications', - 'station, state, estate', 'security, mccf, unit', - 'gaithersburg, clarksburg, the', 'traffic, safety, alcohol', - 'investigative, investigations, criminal', - 'training, recruit, director', 'inspections, inspection, special', - 'district, squad, 3rd', 'services, highway, service', - 'behavioral, health, school', 'silver, spring, ride', - 'nicholson, transit, transport', 'mangement, engineering, parking', - 'welfare, childhood, children', 'collection, recycling, solid', - 'technology, systems, telephone', - 'equipment, management, automotive', - 'facilities, maintenance, supports', - 'rockville, downtown, library', 'warehouse, liquor, stock', - 'development, delivery, montgomery', - 'eligibility, assistance, emergency', - 'programs, program, officers', 'planning, construction, building', - 'patrol, 5th, 4th', 'family, crimes, sexual', - 'toddlers, custody, members', 'coordinator, services, service', - 'operator, bus, operations', 'specialist, special, quality', - 'master, registered, water', 'manager, projects, project', - 'officer, office, police', 'firefighter, rescuer, rescue', - 'supervisor, supervisory, supply', 'assistance, income, client', - 'correctional, correction, corporal', - 'legislative, principal, executive', 'school, room, behavioral', - 'community, security, nurse', - 'communications, telecommunications, safety', - 'lieutenant, captain, chief', - 'information, technology, technologist', + array(['compliance, building, violence', 'gaithersburg, clarksburg, the', + 'station, state, estate', 'development, planning, accounting', + 'patrol, 4th, 5th', 'traffic, safety, alcohol', + 'management, equipment, budget', 'toddlers, custody, members', + 'services, highway, service', 'behavioral, health, school', + 'collection, inspections, operations', 'family, crimes, outreach', + 'welfare, childhood, child', 'security, mccf, unit', + 'supports, support, network', 'emergency, centers, center', + 'district, squad, urban', 'maintenance, facilities, recruit', + 'administration, battalion, admin', 'nicholson, transit, taxicab', + 'warehouse, delivery, cloverly', + 'communications, communication, education', 'spring, silver, king', + 'assessment, protective, projects', + 'technology, telephone, systems', 'rockville, twinbrook, downtown', + 'director, officers, officer', 'assignment, assistance, medical', + 'animal, virtual, regional', + 'investigative, investigations, explosive', + 'firefighter, rescuer, recruit', 'operator, bus, operations', + 'officer, office, security', 'government, employee, budget', + 'liquor, clerk, store', 'information, technology, renovation', + 'manager, engineer, iii', 'income, assistance, client', 'administrative, administration, administrator', - 'enforcement, inspector, abandoned', - 'crossing, purchasing, engineer', - 'warehouse, craftsworker, welfare', 'sergeant, cadet, emergency', - 'liquor, clerk, store', 'assistant, library, librarian', - 'recreation, renovation, paralegal', - 'permitting, planning, resources', - 'equipment, auditor, investment', 'sheriff, deputy, urban', - 'environmental, therapist, enviromental', - 'program, programs, property', 'technician, mechanic, hvac'], - dtype=object) + 'coordinator, coordinating, transit', + 'technician, mechanic, supply', 'accountant, attendant, attorney', + 'corporal, pfc, dietary', 'community, health, nurse', + 'school, room, behavioral', 'services, supervisor, service', + 'enforcement, permitting, inspector', 'lieutenant, captain, chief', + 'assistant, library, librarian', + 'communications, telecommunications, safety', + 'warehouse, welfare, caseworker', 'specialist, special, therapist', + 'crossing, purchasing, planning', 'candidate, sheriff, deputy', + 'legislative, principal, executive', + 'equipment, investment, investigator', + 'program, programs, property', + 'correctional, correction, regional', 'sergeant, police, cadet', + 'master, registered, meter'], dtype=object) @@ -562,7 +557,7 @@ Let's look at the cross-validated R2 score of our model: .. code-block:: none - R2 score: mean: 0.921; std: 0.015 + R2 score: mean: 0.923; std: 0.014 @@ -698,7 +693,7 @@ to plot the feature importances. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (1 minutes 20.731 seconds) + **Total running time of the script:** (1 minutes 27.261 seconds) .. _sphx_glr_download_auto_examples_01_encodings.py: diff --git a/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt b/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt index f01d21a6..c64735ae 100644 --- a/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt +++ b/0.1/_sources/auto_examples/02_feature_interpretation_with_gapencoder.rst.txt @@ -504,7 +504,7 @@ as a set of latent topics. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 2.168 seconds) + **Total running time of the script:** (0 minutes 2.163 seconds) .. _sphx_glr_download_auto_examples_02_feature_interpretation_with_gapencoder.py: diff --git a/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt b/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt index 4e339727..057f4c78 100644 --- a/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt +++ b/0.1/_sources/auto_examples/03_datetime_encoder.rst.txt @@ -610,7 +610,7 @@ and transforms datetime columns by default. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 4.849 seconds) + **Total running time of the script:** (0 minutes 5.097 seconds) .. _sphx_glr_download_auto_examples_03_datetime_encoder.py: diff --git a/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt b/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt index c9b19e24..e8d5fde5 100644 --- a/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt +++ b/0.1/_sources/auto_examples/04_fuzzy_joining.rst.txt @@ -1711,7 +1711,7 @@ introduced into a grid search: .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 22.159 seconds) + **Total running time of the script:** (0 minutes 22.931 seconds) .. _sphx_glr_download_auto_examples_04_fuzzy_joining.py: diff --git a/0.1/_sources/auto_examples/05_deduplication.rst.txt b/0.1/_sources/auto_examples/05_deduplication.rst.txt index d0e94dec..7236f965 100644 --- a/0.1/_sources/auto_examples/05_deduplication.rst.txt +++ b/0.1/_sources/auto_examples/05_deduplication.rst.txt @@ -335,7 +335,7 @@ or |MinHash|. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 5.151 seconds) + **Total running time of the script:** (0 minutes 6.062 seconds) .. _sphx_glr_download_auto_examples_05_deduplication.py: diff --git a/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt b/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt index b6413ed9..3d0cc9ae 100644 --- a/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt +++ b/0.1/_sources/auto_examples/06_ken_embeddings.rst.txt @@ -305,7 +305,7 @@ We will start by checking out the available tables with .. code-block:: none - {'companies', 'games', 'movies', 'all_entities', 'schools', 'albums'} + {'companies', 'games', 'all_entities', 'schools', 'movies', 'albums'} @@ -840,7 +840,7 @@ It helped significantly improve the prediction score. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (11 minutes 21.562 seconds) + **Total running time of the script:** (13 minutes 4.578 seconds) .. _sphx_glr_download_auto_examples_06_ken_embeddings.py: diff --git a/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt b/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt index 12e257f5..835492de 100644 --- a/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt +++ b/0.1/_sources/auto_examples/07_multiple_key_join.rst.txt @@ -1226,7 +1226,7 @@ The results: /home/circleci/project/miniconda/envs/testenv/lib/python3.10/site-packages/sklearn/preprocessing/_encoders.py:228: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros warnings.warn( - 0.5913999999999999 + 0.58445 @@ -1244,7 +1244,7 @@ Our final cross-validated accuracy score is 0.58. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (11 minutes 23.106 seconds) + **Total running time of the script:** (13 minutes 12.928 seconds) .. _sphx_glr_download_auto_examples_07_multiple_key_join.py: diff --git a/0.1/_sources/auto_examples/08_join_aggregation.rst.txt b/0.1/_sources/auto_examples/08_join_aggregation.rst.txt index b8c01447..c9d19a99 100644 --- a/0.1/_sources/auto_examples/08_join_aggregation.rst.txt +++ b/0.1/_sources/auto_examples/08_join_aggregation.rst.txt @@ -844,75 +844,75 @@ operation maximizing our validation score.
tv.named_transformers_["high_cardinality"].get_feature_names_out()
array(['accountability, accounts, community',
- 'assessment, protective, treatment',
- 'communications, communication, telecommunications',
- 'station, state, estate', 'security, mccf, unit',
- 'gaithersburg, clarksburg, the', 'traffic, safety, alcohol',
- 'investigative, investigations, criminal',
- 'training, recruit, director', 'inspections, inspection, special',
- 'district, squad, 3rd', 'services, highway, service',
- 'behavioral, health, school', 'silver, spring, ride',
- 'nicholson, transit, transport', 'mangement, engineering, parking',
- 'welfare, childhood, children', 'collection, recycling, solid',
- 'technology, systems, telephone',
- 'equipment, management, automotive',
- 'facilities, maintenance, supports',
- 'rockville, downtown, library', 'warehouse, liquor, stock',
- 'development, delivery, montgomery',
- 'eligibility, assistance, emergency',
- 'programs, program, officers', 'planning, construction, building',
- 'patrol, 5th, 4th', 'family, crimes, sexual',
- 'toddlers, custody, members', 'coordinator, services, service',
- 'operator, bus, operations', 'specialist, special, quality',
- 'master, registered, water', 'manager, projects, project',
- 'officer, office, police', 'firefighter, rescuer, rescue',
- 'supervisor, supervisory, supply', 'assistance, income, client',
- 'correctional, correction, corporal',
- 'legislative, principal, executive', 'school, room, behavioral',
- 'community, security, nurse',
- 'communications, telecommunications, safety',
- 'lieutenant, captain, chief',
- 'information, technology, technologist',
+array(['compliance, building, violence', 'gaithersburg, clarksburg, the',
+ 'station, state, estate', 'development, planning, accounting',
+ 'patrol, 4th, 5th', 'traffic, safety, alcohol',
+ 'management, equipment, budget', 'toddlers, custody, members',
+ 'services, highway, service', 'behavioral, health, school',
+ 'collection, inspections, operations', 'family, crimes, outreach',
+ 'welfare, childhood, child', 'security, mccf, unit',
+ 'supports, support, network', 'emergency, centers, center',
+ 'district, squad, urban', 'maintenance, facilities, recruit',
+ 'administration, battalion, admin', 'nicholson, transit, taxicab',
+ 'warehouse, delivery, cloverly',
+ 'communications, communication, education', 'spring, silver, king',
+ 'assessment, protective, projects',
+ 'technology, telephone, systems', 'rockville, twinbrook, downtown',
+ 'director, officers, officer', 'assignment, assistance, medical',
+ 'animal, virtual, regional',
+ 'investigative, investigations, explosive',
+ 'firefighter, rescuer, recruit', 'operator, bus, operations',
+ 'officer, office, security', 'government, employee, budget',
+ 'liquor, clerk, store', 'information, technology, renovation',
+ 'manager, engineer, iii', 'income, assistance, client',
'administrative, administration, administrator',
- 'enforcement, inspector, abandoned',
- 'crossing, purchasing, engineer',
- 'warehouse, craftsworker, welfare', 'sergeant, cadet, emergency',
- 'liquor, clerk, store', 'assistant, library, librarian',
- 'recreation, renovation, paralegal',
- 'permitting, planning, resources',
- 'equipment, auditor, investment', 'sheriff, deputy, urban',
- 'environmental, therapist, enviromental',
- 'program, programs, property', 'technician, mechanic, hvac'],
- dtype=object)
+ 'coordinator, coordinating, transit',
+ 'technician, mechanic, supply', 'accountant, attendant, attorney',
+ 'corporal, pfc, dietary', 'community, health, nurse',
+ 'school, room, behavioral', 'services, supervisor, service',
+ 'enforcement, permitting, inspector', 'lieutenant, captain, chief',
+ 'assistant, library, librarian',
+ 'communications, telecommunications, safety',
+ 'warehouse, welfare, caseworker', 'specialist, special, therapist',
+ 'crossing, purchasing, planning', 'candidate, sheriff, deputy',
+ 'legislative, principal, executive',
+ 'equipment, investment, investigator',
+ 'program, programs, property',
+ 'correctional, correction, regional', 'sergeant, police, cadet',
+ 'master, registered, meter'], dtype=object)
R2 score: mean: 0.921; std: 0.015
+R2 score: mean: 0.923; std: 0.014
The simple pipeline applied on this complex dataset gave us very good results.
@@ -939,7 +934,7 @@ ConclusionTotal running time of the script: (1 minutes 20.731 seconds)
+Total running time of the script: (1 minutes 27.261 seconds)