diff --git a/fileio.py b/fileio.py
new file mode 100644
index 0000000..7be1dd7
--- /dev/null
+++ b/fileio.py
@@ -0,0 +1,36 @@
+import glob
+
+def load_directory(path):
+    # all directory names in path are class names
+    # all files inside a directory share label
+    class_paths = glob.glob(path + '/*')
+    class_names = list(map(lambda x: os.path.split(x)[-1], class_paths))
+    file_names = {x: glob.glob(os.path.join(path,x,'*')) for x in class_names}
+    return class_names, file_names
+
+
+def pickle_results(path,file_name,data):
+    if not os.path.exists(path):
+        os.makedirs(path)
+    f = open(os.path.join(path,file_name),'w+')
+    pickle.dump(data,f)
+    f.close()
+    return True
+
+
+def file_dict_to_flat(file_dict):
+    file_list = []
+    for class_name in file_dict:
+        file_list.extend( file_dict[class_name])
+    return file_list
+
+
+def file_list_to_dict(file_list):
+    file_dict = {}
+    for f in file_list:
+        class_name = f.split('/')[-2]
+        if class_name in file_dict:
+            file_dict[class_name].append(f)
+        else:
+            file_dict[class_name] = [f]
+    return file_dict
diff --git a/hotdog.jpg b/hotdog.jpg
new file mode 100644
index 0000000..d94d7de
Binary files /dev/null and b/hotdog.jpg differ
diff --git a/inference.sh b/inference.sh
new file mode 100644
index 0000000..e53c214
--- /dev/null
+++ b/inference.sh
@@ -0,0 +1,7 @@
+curl -X POST \
+  http://127.0.0.1:3031/inceptionV3/predict \
+  -H 'Cache-Control: no-cache' \
+  -H 'Postman-Token: eeedb319-2218-44b9-86eb-63a3a1f62e14' \
+  -H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW' \
+  -F image=@$1 \
+  -F model_name=$2
diff --git a/labeling.py b/labeling.py
new file mode 100644
index 0000000..e0a4fa9
--- /dev/null
+++ b/labeling.py
@@ -0,0 +1,145 @@
+import numpy as np
+from keras.models import load_model
+from keras.preprocessing import image
+from keras.applications.inception_v3 import InceptionV3, preprocess_input
+from sklearn.metrics import pairwise_distances_argmin_min
+
+
+
+
+def labeling_priority(data_unlabeled, trained_model,n,method='ed',data_labeled=None):
+    if method == 'random':
+        return randomly_choose_n(data_unlabeled, n)
+    if method == 'ed':
+        unlabeled_features = feature_extraction(data_unlabeled, trained_model)
+        points = pick_points_faster(unlabeled_features, trained_model, n)
+        labeled_files = [data_unlabeled[idx] for idx in points]
+        return labeled_files
+
+
+def check_label_top_n(result, label, n=1):
+    # returns True if label is in the top n, false otherwise
+    result = result['data']['prediction'][:n]
+    for r in result:
+        if label in r['label']:
+            return True
+    return False
+
+
+def choose_n_from_each_class(file_dict, n):
+    ret_dict = {}
+    for class_name in file_dict:
+        ret_dict[class_name] = []
+        for i in range(n):
+            if file_dict[class_name]:
+                ret_dict[class_name].append(file_dict[class_name][0])
+                del file_dict[class_name][0]
+    return ret_dict
+
+            
+def run_inference_on_dict(file_dict, model_name='base',
+                         url='http://127.0.0.1:3031/inceptionV3/predict'):
+    results = {}
+    for class_name in file_dict:
+        results[class_name] = []
+        for dp in file_dict[class_name]:
+            results[class_name].append(query_inference_server(dp, model_name,url))
+    return results
+
+
+
+
+
+
+
+
+def choose_n(file_dict, n):
+#selects n from each class 
+    ret_dict = {}
+    for class_name in file_dict:
+        if file_dict[class_name]:
+            ret_dict[class_name] = file_dict[class_name][:n]                         
+    return ret_dict
+
+
+def randomly_choose_n(file_list, n):
+#randomly selects n total files
+    random.seed(90210)
+    return random.sample(file_list, n)
+
+
+def compute_accuracy(predictions,class_name):
+    res = predictions[class_name]
+    correct = sum(res[x]['data']['prediction'][0]['label'] == class_name
+                  for x in range(len(res)))
+    return float(correct) / len(res)
+
+        
+def feature_extraction(file_names,your_model):
+#given a list of images and model, returns a list of ndarray feature weights
+    feature_list = []
+    for f in file_names:
+
+        img = image.load_img(f,target_size=(299, 299))
+        
+        x = np.expand_dims(image.img_to_array(img), axis=0)
+        x = preprocess_input(x)
+
+        feature = your_model.predict(x)
+        feature_np = np.array(feature)
+        feature_list.append(feature_np.flatten())
+
+    return feature_list
+
+
+def pick_points_faster(unlabeled_features, model, n,labeled_features=[]):
+    from sklearn.metrics import pairwise_distances_argmin_min
+    from sklearn.metrics.pairwise  import euclidean_distances as ed
+    labels = []
+    if len(labeled_eatures) == 0:
+        labeled_features = [unlabeled_features[0]]
+        labels.append(0)
+        n = n - 1
+
+    indices, values = pairwise_distances_argmin_min(unlabeled_features,
+                                                        labeled_features)
+    for i in range(n):
+        
+        max_of_min = np.argmax(values)
+        printme = values[max_of_min]
+        labeled_features.append(unlabeled_features[max_of_min])
+        labels.append(max_of_min)
+        indices, values_new = pairwise_distances_argmin_min(unlabeled_features,
+                                                        [labeled_features[-1]])
+        for j in range(len(unlabeled_features)):
+            if values_new[j] < values[j]:
+                values[j] = values_new[j]
+
+    return labels 
+
+
+def cluster_label(file_names,your_model,n):
+    feature_list = []
+    for f in file_names:
+        img = image.load_img(f,target_size=(299, 299))
+
+        x = np.expand_dims(image.img_to_array(img), axis=0)
+        x = preprocess_input(x)
+#        x = x.copy(order="C")
+
+        feature = your_model.predict(x)
+        feature_np = np.array(feature)
+        feature_list.append(feature_np.flatten())
+
+    feature_list_np = np.array(feature_list)
+    kmeans = KMeans(n_clusters=n, random_state=0).fit(feature_list_np)
+    return kmeans
+
+
+
+
+#keras
+#requests
+#boto3
+#tensorflow
+#sklearn.metrics
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..d732f96
--- /dev/null
+++ b/main.py
@@ -0,0 +1,124 @@
+import boto3
+import requests
+import os
+import random
+import shutil
+import pickle
+import time
+from fileio import *
+from sherlockWrapper import *
+from labeling import *
+
+def random_images_loop(model_name, file_loc, base_model='inceptionV3', N_initial=100,
+                     bucket='insightai2019', ip_addr='http://127.0.0.1:3031/'):
+    #main body for running random 
+    output_path = './results/' + model_name
+    transfer_url = ip_addr + base_model + '/transfer'
+    inference_url = ip_addr + base_model + '/predict'
+    status_url = ip_addr + 'tasks/info'
+
+    class_names, file_names = loadDirectory('./' + file_loc + '/train/')
+    validate_class_names, validate_file_names = loadDirectory('./' +
+                                                             file_loc + '/val/')
+    test_class_names, test_file_names = loadDirectory('./' + file_loc + '/test/')
+
+    train_dict = choose_n(file_names, N_initial)
+
+    upload_to_S3(train_dict,os.path.join('models',model_name,'train'))
+    upload_to_S3(random_file_dict,os.path.join('models',model_name,'train'))    
+    upload_to_S3(validate_file_names, os.path.join('models',model_name,'val'))
+    
+    r = train_new_model(model_name, bucket_name='insightai2019', path_prefix='models',
+                         url=transfer_url)
+    wait_for_training(r)
+    rid = r['task_id']
+    response = requests.post(status_url,json={rid:rid})
+    r_acc = response.json()
+    test_random = run_inference_on_dict(test_file_names, model_name)
+    acc_random = []
+    for k in test_random:
+        acc_random.append(compute_accuracy(test_random,k))
+        
+    save_file_name = 'r{}.pickle'.format(0)
+    pickle_results(output_path, save_file_name, [r_acc,test_random])
+    return r_acc, test_random
+
+
+def non_random_images_loop(model_name, file_loc, base_model='inceptionV3', N_initial=100,
+                     bucket='insightai2019', ip_addr='http://127.0.0.1:3031'):
+#    model_name = 'imgnet11.maxpool.584'
+#    file_loc = 'imgnetmodel'
+#    base_model = 'inceptionV3'
+#    N_initial = 584
+#    bucket = 'insightai2019'
+#    ip_addr='http://127.0.0.1:3031/'
+    
+    output_path = './results/' + model_name
+    transfer_url = ip_addr + base_model + '/transfer'
+    inference_url = ip_addr + base_model + '/predict'
+    status_url = ip_addr + 'tasks/info'
+    retrain_url=ip_addr + 'inceptionV3/retrain'
+    
+#    iv3 = InceptionV3(weights='imagenet',input_shape=(299,299,3))
+    iv3_topless = InceptionV3(include_top=False, weights='imagenet',pooling=max,
+                              input_shape=(299,299,3))
+    
+    class_names, file_names = loadDirectory('./' + file_loc + '/train/')
+    validate_class_names, validate_file_names = loadDirectory('./' +
+                                                             file_loc + '/val/')
+    class_names, test_file_names = loadDirectory('./' + file_loc + '/test/')
+
+    file_list = []
+    file_labels = []
+    for k in file_names:
+        file_list.extend(file_names[k])
+        file_labels.extend([k] * len(file_names[k]))
+
+    unlabeled_features = feature_extraction(file_list, iv3_topless)
+    points = pick_points_faster(unlabeled_features, iv3_topless, N_initial)
+    labeled_files = [file_list[idx] for idx in points]
+
+    upload_dict = {k :[] for k in class_names}
+    for idx in points:
+        upload_dict[file_labels[idx]].append(file_list[idx])
+    upload_to_S3(upload_dict,os.path.join('models',model_name,'train'))
+    upload_to_S3(validate_file_names, os.path.join('models',model_name,'val'))
+    
+    r = train_new_model(model_name, bucket_name='insightai2019', path_prefix='models',
+                         url=transfer_url)
+    wait_for_training(r)
+    rid = r['task_id']
+    response = requests.post(status_url,json={rid:rid})
+    train_acc = response.json()#83.6 training, 81.6 validation
+    test_results = runInferenceOnDict(test_file_names, model_name)
+    test_acc = []
+    for k in test_results:
+        test_acc.append(computeAccuracy(test_results,k))
+
+    return train_acc, test_acc
+
+
+def main(model_name, base_model='inceptionV3', N_initial=5,
+         iterations=1, labelsPerRound=5, bucket='insightai2019',
+         ip_addr='http://127.0.0.1:3031/'):
+
+    model_name = 'HotWineBike1kRandom'
+    base_model = 'inceptionV3'
+    N_initial = 25
+    iterations = 1
+    labelsPerRound = 25
+    bucket = 'insightai2019'
+    output_path = './results/' + model_name
+    ip_addr='http://127.0.0.1:3031/'
+    transfer_url = ip_addr + base_model + '/transfer'
+    inference_url = ip_addr + base_model + '/predict'
+    status_url = ip_addr + 'tasks/info'
+    retrain_url=ip_addr + 'inceptionV3/retrain'
+    iv3 = InceptionV3(weights='imagenet',input_shape=(299,299,3))
+    iv3_topless = InceptionV3(include_top=False, weights='imagenet',input_shape=(299,299,3))
+    # load the images - array of Images
+    
+            
+if __name__ == '__main__':
+    main('tomato_potato')
+
diff --git a/misc.py b/misc.py
new file mode 100644
index 0000000..5445877
--- /dev/null
+++ b/misc.py
@@ -0,0 +1,17 @@
+def train_test_validate(path, model_path, n_test=50, n_val=50,
+                      names=['test', 'val', 'train'] ):
+    class_name = os.path.split(path)[-1]
+    file_names = glob.glob(os.path.join(path,'*'))
+    destinations = map(lambda x: os.path.join(model_path, x,
+                                              class_name), names)
+    random.shuffle(file_names)
+    for d in destinations:
+        if not os.path.isdir(d):
+            os.makedirs(d)
+        
+    map(lambda x: shutil.move(x, os.path.join(destinations[0],
+                            os.path.split(x)[-1])), file_names[0:n_test] )
+    map(lambda x: shutil.move(x, os.path.join(destinations[1],
+                        os.path.split(x)[-1])), file_names[nTest:n_test + n_val] )
+    map(lambda x: shutil.move(x, os.path.join(destinations[2],
+                            os.path.split(x)[-1])), file_names[n_test + n_val:] )
diff --git a/sherlockWrapper.py b/sherlockWrapper.py
new file mode 100644
index 0000000..58ba29a
--- /dev/null
+++ b/sherlockWrapper.py
@@ -0,0 +1,57 @@
+def query_inference_server(fileName, model_name='base',
+                         url='http://127.0.0.1:3031/inceptionV3/predict'):
+    form_data = {'model_name': model_name}
+    files = {'image': open(fileName, 'rb')}
+    response = requests.post(url, files=files, data=form_data)
+    return response.json()
+
+
+def train_new_model(model_name, bucket_name='insightai2019', path_prefix='models',
+                         url='http://127.0.0.1:3031/inceptionV3/transfer'):
+    form_data = {
+        'train_bucket_name': bucket_name, 
+        'train_bucket_prefix': os.path.join(path_prefix, model_name)
+    }
+
+    response = requests.post(url, data=form_data)
+    return response.json()
+
+
+def check_status(id, url='http://127.0.0.1:3031/tasks/info'):
+    response = requests.post(url,json={id:id})
+    return response.json()['Tasks Status'][0]['status'] == 'SUCCESS'
+
+
+def retrain_model(model_name, path, bucket_name='insightai2019',
+                 nb_epoch=3, batch_size=2,
+                 url='http://127.0.0.1:3031/inceptionV3/retrain'):
+    form_data = {
+        'nb_epoch': nb_epoch,
+        'batch_size': batch_size,
+        'train_bucket_name': bucket_name,
+        'train_bucket_prefix': os.path.join(path, model_name)
+    }
+
+    response = requests.post(url, data=form_data)
+    return response.json()
+
+
+def upload_to_S3(file_dict, key_path, bucket_name='insightai2019'):
+    # push all files in file_dict to S3
+    s3 = boto3.client('s3')
+    for key in file_dict:
+        for datapoint in file_dict[key]:
+            fname= os.path.split(datapoint)[-1]
+            file_key = os.path.join(key_path,key,fname)
+            print file_key
+            s3.upload_file(datapoint, bucket_name, file_key)
+
+
+def wait_for_training(response, t=20, t_max=900,
+                      url='http://127.0.0.1:3031/tasks/info'):
+    status = check_status(response['task_id'],url)
+    while not status:
+        time.sleep(t)
+        t += t / 10
+        status = check_status(response['task_id'],url)
+    return 1
diff --git a/src/app/apis/InceptionV3/API_helpers.py b/src/app/apis/InceptionV3/API_helpers.py
index 2beef9b..410701b 100644
--- a/src/app/apis/InceptionV3/API_helpers.py
+++ b/src/app/apis/InceptionV3/API_helpers.py
@@ -41,7 +41,7 @@ def save_classes_label_dict(label_dict, file_path_name):
     with open(file_path_name, 'w') as fp:
         json.dump(label_dict, fp)
     
-    print "* Helper: Classes Label Json Saved"
+    print "*Helper: Classes Label Json Saved"
 
 def download_a_dir_from_s3(bucket_name, bucket_prefix, local_path):
     """
@@ -51,7 +51,7 @@ def download_a_dir_from_s3(bucket_name, bucket_prefix, local_path):
     
     Will not download if the local folder already exists
     """
-    print "* Helper: Loading Images from S3 {} {}".format(bucket_name,bucket_prefix)
+    print "*Helper: Loading Images from S3 {} {}".format(bucket_name, bucket_prefix)
     output_path = os.path.join(local_path, bucket_prefix)
     
     if not os.path.exists(os.path.join(output_path, 'train')):
@@ -69,12 +69,17 @@ def download_a_dir_from_s3(bucket_name, bucket_prefix, local_path):
                 os.makedirs(save_path)
             except OSError:
                 pass
-            mybucket.download_file(obj.key, os.path.join(save_path, filename))
-            
+            try:
+                mybucket.download_file(obj.key,
+                           os.path.join(save_path, filename))
+            #iterator contains keys for directories and files
+            except OSError:
+                pass
+
     print "* Helper: Images Loaded at: {}".format(output_path)
     return output_path
 
 
     
     
-    
\ No newline at end of file
+    
diff --git a/src/app/apis/InceptionV3/inceptionV3.py b/src/app/apis/InceptionV3/inceptionV3.py
index 8c2caa4..4bb2d9b 100644
--- a/src/app/apis/InceptionV3/inceptionV3.py
+++ b/src/app/apis/InceptionV3/inceptionV3.py
@@ -48,12 +48,14 @@ def label():
     s3_bucket_name = request.form.get('s3_bucket_name')
     s3_bucket_prefix = request.form.get('s3_bucket_prefix')
     model_name = request.form.get('model_name')
-    
+    print "s3_bucket_name: {}".format(s3_bucket_name)
+    print "s3_bucket_prefix: {}".format(s3_bucket_prefix)
+    print "model_name: {}".format(model_name)
     # load image from s3
     image_data_path = API_helpers.download_a_dir_from_s3(s3_bucket_name,
                                                      s3_bucket_prefix, 
                                                      local_path = TEMP_FOLDER)
-    
+    print "image_data_path:{}".format(image_data_path)
     # for each images in the folder
     # supports .png and .jpg
     all_image_ids = []
@@ -116,12 +118,16 @@ def retrain():
     @args: train_bucket_url: URL pointing to the folder for training data on S3
     @args: model_name: the name of the model want to be retraiend, the folder must be exsit
     """
+    print "retrain line 1"
     s3_bucket_name = request.form.get('train_bucket_name')
+    print "s3 bucket name = {}".format(s3_bucket_name)
     s3_bucket_prefix = request.form.get('train_bucket_prefix')
+    print "s3 bucket prefix = {}".format(s3_bucket_prefix)
     nb_epoch = request.form.get('nb_epoch')
     batch_size = request.form.get('batch_size')
     
     model_name = s3_bucket_prefix.split('/')[-1]
+    print "model_name = {}".format(model_name)
     local_data_path = os.path.join('./tmp')
     
     # create a celer task id
@@ -129,7 +135,7 @@ def retrain():
     # download the folder in the url
     # return the path of the image files
     async_retrain.apply_async((model_name, 
-                               local_data_path,
+          #                     local_data_path,
                                s3_bucket_name,
                                s3_bucket_prefix,
                                nb_epoch,
@@ -182,9 +188,12 @@ def run_inceptionV3():
     
     # load model name
     model_name = request.form.get('model_name')
+    print "model_name = {}, type = {}".format(model_name, type(model_name))
 
     # load and pre-processing image
     img = request.files['image']
+    print "image type = {}".format(type(img))
+    
     img = image.load_img(img, target_size = (299, 299))
     x = np.expand_dims(image.img_to_array(img), axis=0)
     x = preprocess_input(x)
@@ -226,4 +235,4 @@ def run_inceptionV3():
     
     
     
-    
\ No newline at end of file
+    
diff --git a/src/app/apis/tasks/remote_tasks.py b/src/app/apis/tasks/remote_tasks.py
index 4801508..ec56668 100644
--- a/src/app/apis/tasks/remote_tasks.py
+++ b/src/app/apis/tasks/remote_tasks.py
@@ -12,12 +12,13 @@ def task_info():
     check the progress of the remote tasks by its id
     """
     task_ids = request.get_json()
+    print "task_ids = {}".format(task_ids)
     task_results = []
     
     for each_id in task_ids:
         this_task = michaniki_celery_app.AsyncResult(each_id)
         this_status = str(this_task.state)
-        
+        print "this_stats = {}".format(this_status)
         if this_status == "SUCCESS":
             # get the training and validation acc
             this_res = this_task.get()
@@ -50,4 +51,4 @@ def cancel_task():
              "remote_task_id": task_id,
              "status": "REVOKED"
              }
-        })
\ No newline at end of file
+        })
diff --git a/src/app/tasks.py b/src/app/tasks.py
index 009fd06..dce9915 100644
--- a/src/app/tasks.py
+++ b/src/app/tasks.py
@@ -32,7 +32,7 @@ def async_retrain(model_name,
     retrain model
     resume training
     """    
-    # download image data to local 
+    # download image data to local
     image_data_path = API_helpers.download_a_dir_from_s3(s3_bucket_name,
                                                      s3_bucket_prefix, 
                                                      local_path = TEMP_FOLDER)
@@ -108,4 +108,4 @@ def async_transfer(model_name,
         shutil.rmtree(image_data_path, ignore_errors=True)
         raise
     
-        
\ No newline at end of file
+        
diff --git a/train.sh b/train.sh
new file mode 100644
index 0000000..dda2279
--- /dev/null
+++ b/train.sh
@@ -0,0 +1,9 @@
+curl -X POST \
+  http://127.0.0.1:3031/inceptionV3/transfer \
+  -H 'Cache-Control: no-cache' \
+  -H 'Postman-Token: 4e90e1d6-de18-4501-a82c-f8a878616b12' \
+  -H 'content-type: multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW' \
+  -F train_bucket_name=insightai2019 \
+  -F train_bucket_prefix=models/tomato_potato
+
+