Skip to content

Commit

Permalink
pst extract feature
Browse files Browse the repository at this point in the history
  • Loading branch information
scotthaleen committed Feb 4, 2015
1 parent 104770a commit e0be79a
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 11 deletions.
5 changes: 5 additions & 0 deletions bin/pstextract.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@ fi

mkdir "${DIR}"

echo "readpst -e -o ${DIR}/ -b -D $2"
readpst -e -o "${DIR}/" -b -D $2

find "${DIR}" -type f > "${DIR}/emails.txt"

if [[ -d "demail/emails/${1}" ]]; then
rm -rf "demail/emails/${1}"
fi

cat "${DIR}/emails.txt" | ./pst/normalize.py "${1}" "demail/emails/${1}" --start 0 --limit 2000
5 changes: 5 additions & 0 deletions demail/css/ingest-default.css
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,9 @@ span.bold {
border-bottom: 1px solid #ccc;
font-family: "Lucida Console", Monaco, monospace;
font-size: 12px;
}

label.upload input[type="file"] {
position: fixed;
top: -1000px;
}
18 changes: 18 additions & 0 deletions demail/ingest.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,24 @@
<button id="btn-download" class="btn btn-md btn-primary">Download</button>
</p>
</form>
<form class="form-ingest" role="form">
<p>
<span>Or Select a PST to ingest</span><br/>
<span>(copy a pst files to </span>
<span class="bold">/vagrant/pst</span><span> directory on the VM)</span>
</span>
</p>
<p>
<input id="txt_pstemail"
class="form-control" placeholder="PST Email Address" required >
</p>
<p>
<select class="form-control" id="pst-options"></select>
</p>
<p>
<button id="btn-pst-extract" class="btn btn-md btn-primary">Extract</button>
</p>
</form>
</div>

<div class="col-md-4">
Expand Down
104 changes: 101 additions & 3 deletions demail/js/ingest-tool.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ var validateEmail = function(email) {
};

var FORM = (function(){
var items = ['txt_email', 'txt_pass', 'btn-download', 'btn-ingest', 'ingest-options'];
var items = ['txt_email', 'txt_pass', 'btn-download', 'btn-ingest', 'ingest-options', 'txt_pstemail', 'pst-options', 'btn-pst-extract'];
var enable = _.partial(_.each, items, function(item){
$('#' + item).removeAttr('disabled');
});
Expand Down Expand Up @@ -44,7 +44,21 @@ var refresh_ingest_options = function(){
$('#ingest-options').append($('<option>').html(item));
});
});
}
};

var refresh_pst_options = function(){
$.ajax({
'url' : 'pst/list',
'type': 'GET',
'dataType' : 'json'
}).then(function(resp){
$('#ingest-options').empty();

_.each(resp.items, function(item){
$('#pst-options').append($('<option>').html(item));
});
});
};

var parseStatus = function(sz){
var parts = sz.trim().split("\n");
Expand Down Expand Up @@ -78,6 +92,36 @@ var pollForStatus = function(url, statuses, callback){
};
};

var pollForStatusExtract = function(logname, statuses, callback){
var url = 'ingest/ingeststate/' + logname;
var log_url = 'ingest/ingestlog/' + logname;
return function(){
(function poll(){
var success = function(resp){
var status = parseStatus(resp.log);
console.log(status);
var b = _.some(statuses, function(s){
return s.toLowerCase() == status.toLowerCase();
});

if (b){
callback(status)
} else {
_.delay(poll, 15 * 1000);
}

$.ajax({ url : log_url , dataType: 'json'}).then(function(resp){
logMsgs("Extracting", status, _.last(resp.log.split("\n"), 15));
//refreshLogItems(_.last(resp.log.split("\n"), 15));
});
};

$.ajax({ url : url, dataType: 'json'}).then(success);
})();
};
};


var pollForStatusIngest = function(logname, statuses, callback){
var url = 'ingest/ingeststate/' + logname;
var log_url = 'ingest/ingestlog/' + logname;
Expand Down Expand Up @@ -107,6 +151,8 @@ var pollForStatusIngest = function(logname, statuses, callback){
};
};



var ingestComplete = function(){
$("#div-ingest-complete").show();
};
Expand Down Expand Up @@ -165,6 +211,38 @@ var run_ingest = function(str){

};


var extract_pst = function(email, pst_file) {

var extract = $.ajax({
'url' : 'pst/extract',
'type': 'POST',
'dataType' : 'json',
'data': JSON.stringify({ 'email' : email, 'pst': pst_file }),
'contentType':"application/json; charset=utf-8"
});

var fail = function(){
console.log(arguments);
alert('error');
FORM.enable();
};

FORM.disable();

extract.then(function(resp){
console.log(arguments);
var logname = resp.log;
var poll = pollForStatusExtract(logname, ['Complete', 'Error'], function(status){
FORM.enable();
refresh_ingest_options();
alert(status);
});
poll();
}, fail);

};

var click_handler_download = function(evt){
evt.preventDefault();
var user = $('#txt_email').val();
Expand Down Expand Up @@ -215,11 +293,31 @@ var click_handler_ingest = function(evt){
return false;
};

var click_handler_pst_extract = function(evt){
evt.preventDefault();

var email = $('#txt_pstemail').val().trim();

if (email.length == 0){
alert('please enter the email associated with this pst');
return;
}

if (!validateEmail(email)){
alert(email + " is not a valid email address. \nPlease enter a valid email \nexample: [email protected]")
return;
};

extract_pst(email, $('#pst-options').val());
};


$('#btn-download').on('click', click_handler_download);
$('#btn-ingest').on('click', click_handler_ingest);

$('#btn-pst-extract').on('click', click_handler_pst_extract);


//init

refresh_ingest_options();
refresh_pst_options();
93 changes: 93 additions & 0 deletions demail/pst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import with_statement

import threading
import subprocess

import tangelo
import cherrypy
import json
import os
import sys
import datetime

from newman.utils.file import rm, spit

webroot = cherrypy.config.get("webroot")
base_dir = os.path.abspath("{}/../".format(webroot))
work_dir = os.path.abspath("{}/../work_dir/".format(webroot))
pst_dir = "/vagrant/pst"


def fmtNow():
return datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S')

def extract_pst(*args, **kwargs):
email=kwargs.get("email")
pst=kwargs.get("pst")
pst_path = "{}/{}".format(pst_dir, pst)

logname = "pst_{}".format(fmtNow())
teefile = "{}/{}.tee.log".format(work_dir, logname)
errfile = "{}/{}.err.log".format(work_dir, logname)
logfile = "{}/{}.status.log".format(work_dir, logname)

spit(logfile, "[Start] {}\n".format(email), True)

def extract_thread():
args = ["./bin/pstextract.sh", email, pst_path]
cherrypy.log("running pst: {}".format(" ".join(args)))
spit(logfile, "[Running] {} \n".format(" ".join(args)))
try:
with open(teefile, 'w') as t, open(errfile, 'w') as e:
kwargs = {'stdout': t, 'stderr': e, 'cwd': base_dir, 'bufsize' : 1 }
subp = subprocess.Popen(args, **kwargs)
out, err = subp.communicate()
cherrypy.log("complete: {}".format(fmtNow()))
rtn = subp.returncode
if rtn != 0:
spit(logfile, "[Error] return with non-zero code: {} \n".format(rtn))
else:
spit(logfile, "[Complete]")
except Exception:
error_info = sys.exc_info()[0]
cherrypy.log(error_info)
spit(logfile, "[Error] {}\n".format(error_info.replace('\n', ' ')))

thr = threading.Thread(target=extract_thread, args=())
thr.start()
tangelo.content_type("application/json")
return {'log' : logname }

def list_psts():
path = "{}/".format(pst_dir)
_, dirnames, filenames = os.walk(path).next()
tangelo.content_type("application/json")
return { 'items' : filenames }

get_actions = {
"list" : list_psts
}

actions = {
"extract" : extract_pst
}

def unknown(*args):
return tangelo.HTTPStatusCode(400, "invalid service call")

@tangelo.restful
def get(action, *args, **kwargs):
return get_actions.get(action, unknown)(*args)

@tangelo.restful
def post(*args, **kwargs):
def unknown(*args, **kwargs):
return tangelo.HTTPStatusCode(400, "invalid service call")

action = '.'.join(args)
post_data = cherrypy.request.body.read()
if post_data:
#if ajax body post
return actions.get(action, unknown)(*args, **json.loads(post_data))
#else form data post
return actions.get(action, unknown)(*args, **kwargs)
21 changes: 13 additions & 8 deletions pst/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,16 @@ def skip(iterable, at_start=0, at_end=0):
for i, line in enumerate(skip(args.infile, at_start=args.start)):
if ((not args.limit == 0) and (i >= args.limit)):
break;
fp = line.strip()
guid = email_extract.md5(fp)
category = email_extract.categoryList(fp)
buff = slurp(fp)
row = email_extract.extract(guid, buff, args.out_dir, category, args.target_email)
spit(outfile, row + "\n")

prn("completed line: {}".format(i + args.start))
try:
fp = line.strip()
guid = email_extract.md5(fp)
category = email_extract.categoryList(fp)
buff = slurp(fp)

row = email_extract.extract(guid, buff, args.out_dir, category, args.target_email)
spit(outfile, row + "\n")
except Exception as e:
print "exception line: {} | {} ".format(i, e.message)

if i % 100 == 0:
prn("completed line: {}".format(i + args.start))

0 comments on commit e0be79a

Please sign in to comment.