From 87faab03d6fe94a003274798eac25841f3320bf5 Mon Sep 17 00:00:00 2001 From: Scott Haleen Date: Wed, 31 Dec 2014 14:56:58 -0500 Subject: [PATCH] new index tables for email to community --- db_scripts/indices/index.sql | 8 ++++++++ db_scripts/tables/tables.sql | 6 ++++++ demail/search.py | 27 ++++++++++----------------- ingest/src/post_process.py | 15 ++++++++++++++- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/db_scripts/indices/index.sql b/db_scripts/indices/index.sql index bb428762..c72e76aa 100644 --- a/db_scripts/indices/index.sql +++ b/db_scripts/indices/index.sql @@ -93,6 +93,14 @@ create index idx_xref_email_topic_score_email on xref_email_topic_score(email_id create index idx_xref_email_topic_score_email_category on xref_email_topic_score(category_id, email_id); create index idx_xref_email_topic_score_category on xref_email_topic_score(category_id); +call drop_index_if_exists('xref_email_community', 'idx_xref_email_community_email_community'); +call drop_index_if_exists('xref_email_community', 'idx_xref_email_community_email'); +call drop_index_if_exists('xref_email_community', 'idx_xref_email_community_community'); + +create index idx_xref_email_community_email_community on xref_email_community(email_id, community_id); +create index idx_xref_email_community_email on xref_email_community(email_id); +create index idx_xref_email_community_community on xref_email_community(community_id); + call drop_index_if_exists('search_results', 'idx_search_results_email_id'); create index idx_search_results_email_id on search_results(email_id); diff --git a/db_scripts/tables/tables.sql b/db_scripts/tables/tables.sql index b39bc326..92c1d000 100644 --- a/db_scripts/tables/tables.sql +++ b/db_scripts/tables/tables.sql @@ -146,6 +146,12 @@ create table xref_email_topic_score ( score varchar(64) not null ) ENGINE=MyISAM; +drop table if exists xref_email_community; + +create table xref_email_community ( + email_id varchar(250) not null, + community_id varchar(250) not null +) ENGINE=MyISAM; drop table if exists search_results; diff --git a/demail/search.py b/demail/search.py index 7cc545fc..35d24f21 100755 --- a/demail/search.py +++ b/demail/search.py @@ -51,12 +51,10 @@ ) stmt_node_vals_filter_community = ( - " select e.email_addr, e.community, e.community_id, e.group_id, e.total_received, e.total_sent, e.rank " + " select distinct e.email_addr, e.community, e.community_id, e.group_id, e.total_received, e.total_sent, e.rank " " from email_addr e join xref_emailaddr_email xaddr on e.email_addr = xaddr.email_addr" - " join email eml on eml.id = xaddr.email_id " - " join xref_emailaddr_email xaddr2 on xaddr2.email_id = xaddr.email_id " - " join email_addr addr on addr.email_addr = xaddr2.email_addr " - " where addr.community_id = %s " + " join xref_email_community xeml on xeml.email_id = xaddr.email_id " + " where xeml.community_id = %s " ) ## Email Rows @@ -94,9 +92,8 @@ ) stmt_find_emails_filter_community = ( - " select id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize " - " from email e join xref_emailaddr_email addr on e.id = addr.email_id" - " join email_addr x on x.email_addr = addr.email_addr " + " select distinct id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize " + " from email e join xref_email_community x on e.id = x.email_id " " where x.community_id = %s " ) @@ -198,17 +195,13 @@ " select source, target, sum(weight)" " from (" " select x.`from` as source, x.recipient as target, count(1) as weight " - " from xref_recipients x join email e on x.email_id = e.id " - " join xref_emailaddr_email xaddr on e.id = xaddr.email_id " - " join email_addr addr on addr.email_addr = xaddr.email_addr " - " where addr.community_id = %s " - " group by `from`, recipient " + " from xref_recipients x join xref_email_community e on x.email_id = e.email_id " + " where e.community_id = %s " + " group by `from`, recipient " " union all" " select x.recipient as source, x.`from` as target, count(1) as weight " - " from xref_recipients x join email e on x.email_id = e.id " - " join xref_emailaddr_email xaddr on e.id = xaddr.email_id " - " join email_addr addr on addr.email_addr = xaddr.email_addr " - " where addr.community_id = %s " + " from xref_recipients x join xref_email_community e on x.email_id = e.email_id " + " where e.community_id = %s " " group by x.`from`, x.recipient " " ) as t " " group by source, target" diff --git a/ingest/src/post_process.py b/ingest/src/post_process.py index f549d647..a93746e8 100755 --- a/ingest/src/post_process.py +++ b/ingest/src/post_process.py @@ -37,12 +37,19 @@ " on x.entity_id = e.subject " ) +stmt_xref_email_community = ( + " insert into xref_email_community (email_id, community_id) " + " select distinct e.id, a.community_id " + " from email e join xref_emailaddr_email x on e.id = x.email_id " + " join email_addr a on x.email_addr = a.email_addr " +) + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Post Process') args= parser.parse_args() - with newman_connector() as read_cnx, newman_connector() as write_cnx: + with newman_connector() as write_cnx: print "populate email_addr" with execute_nonquery(write_cnx.conn(), stmt_email_addr_insert) as qry: pass @@ -57,3 +64,9 @@ with execute_nonquery(write_cnx.conn(), stmt_xref_entity_email) as qry: pass write_cnx.commit() + + print "populate xref_email_community" + with execute_nonquery(write_cnx.conn(), stmt_xref_email_community) as qry: + pass + write_cnx.commit() +