Skip to content

Commit

Permalink
new index tables for email to community
Browse files Browse the repository at this point in the history
  • Loading branch information
scotthaleen committed Dec 31, 2014
1 parent 0d9775d commit 87faab0
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 18 deletions.
8 changes: 8 additions & 0 deletions db_scripts/indices/index.sql
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ create index idx_xref_email_topic_score_email on xref_email_topic_score(email_id
create index idx_xref_email_topic_score_email_category on xref_email_topic_score(category_id, email_id);
create index idx_xref_email_topic_score_category on xref_email_topic_score(category_id);

call drop_index_if_exists('xref_email_community', 'idx_xref_email_community_email_community');
call drop_index_if_exists('xref_email_community', 'idx_xref_email_community_email');
call drop_index_if_exists('xref_email_community', 'idx_xref_email_community_community');

create index idx_xref_email_community_email_community on xref_email_community(email_id, community_id);
create index idx_xref_email_community_email on xref_email_community(email_id);
create index idx_xref_email_community_community on xref_email_community(community_id);

call drop_index_if_exists('search_results', 'idx_search_results_email_id');
create index idx_search_results_email_id on search_results(email_id);

Expand Down
6 changes: 6 additions & 0 deletions db_scripts/tables/tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ create table xref_email_topic_score (
score varchar(64) not null
) ENGINE=MyISAM;

drop table if exists xref_email_community;

create table xref_email_community (
email_id varchar(250) not null,
community_id varchar(250) not null
) ENGINE=MyISAM;

drop table if exists search_results;

Expand Down
27 changes: 10 additions & 17 deletions demail/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,10 @@
)

stmt_node_vals_filter_community = (
" select e.email_addr, e.community, e.community_id, e.group_id, e.total_received, e.total_sent, e.rank "
" select distinct e.email_addr, e.community, e.community_id, e.group_id, e.total_received, e.total_sent, e.rank "
" from email_addr e join xref_emailaddr_email xaddr on e.email_addr = xaddr.email_addr"
" join email eml on eml.id = xaddr.email_id "
" join xref_emailaddr_email xaddr2 on xaddr2.email_id = xaddr.email_id "
" join email_addr addr on addr.email_addr = xaddr2.email_addr "
" where addr.community_id = %s "
" join xref_email_community xeml on xeml.email_id = xaddr.email_id "
" where xeml.community_id = %s "
)

## Email Rows
Expand Down Expand Up @@ -94,9 +92,8 @@
)

stmt_find_emails_filter_community = (
" select id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize "
" from email e join xref_emailaddr_email addr on e.id = addr.email_id"
" join email_addr x on x.email_addr = addr.email_addr "
" select distinct id, dir, datetime, from_addr, tos, ccs, bccs, subject, attach, bodysize "
" from email e join xref_email_community x on e.id = x.email_id "
" where x.community_id = %s "
)

Expand Down Expand Up @@ -198,17 +195,13 @@
" select source, target, sum(weight)"
" from ("
" select x.`from` as source, x.recipient as target, count(1) as weight "
" from xref_recipients x join email e on x.email_id = e.id "
" join xref_emailaddr_email xaddr on e.id = xaddr.email_id "
" join email_addr addr on addr.email_addr = xaddr.email_addr "
" where addr.community_id = %s "
" group by `from`, recipient "
" from xref_recipients x join xref_email_community e on x.email_id = e.email_id "
" where e.community_id = %s "
" group by `from`, recipient "
" union all"
" select x.recipient as source, x.`from` as target, count(1) as weight "
" from xref_recipients x join email e on x.email_id = e.id "
" join xref_emailaddr_email xaddr on e.id = xaddr.email_id "
" join email_addr addr on addr.email_addr = xaddr.email_addr "
" where addr.community_id = %s "
" from xref_recipients x join xref_email_community e on x.email_id = e.email_id "
" where e.community_id = %s "
" group by x.`from`, x.recipient "
" ) as t "
" group by source, target"
Expand Down
15 changes: 14 additions & 1 deletion ingest/src/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,19 @@
" on x.entity_id = e.subject "
)

stmt_xref_email_community = (
" insert into xref_email_community (email_id, community_id) "
" select distinct e.id, a.community_id "
" from email e join xref_emailaddr_email x on e.id = x.email_id "
" join email_addr a on x.email_addr = a.email_addr "
)

if __name__ == "__main__":

parser = argparse.ArgumentParser(description='Post Process')
args= parser.parse_args()

with newman_connector() as read_cnx, newman_connector() as write_cnx:
with newman_connector() as write_cnx:
print "populate email_addr"
with execute_nonquery(write_cnx.conn(), stmt_email_addr_insert) as qry:
pass
Expand All @@ -57,3 +64,9 @@
with execute_nonquery(write_cnx.conn(), stmt_xref_entity_email) as qry:
pass
write_cnx.commit()

print "populate xref_email_community"
with execute_nonquery(write_cnx.conn(), stmt_xref_email_community) as qry:
pass
write_cnx.commit()

0 comments on commit 87faab0

Please sign in to comment.