From 9c723ca983b7eb188e9e8900e516e7f11949cbc4 Mon Sep 17 00:00:00 2001
From: orisenbazuru <orisenbazuru@gmail.com>
Date: Thu, 10 Jun 2021 16:47:37 +0200
Subject: [PATCH] =?UTF-8?q?update=20for=20paper=20review=20=F0=9F=93=83?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                                    |   6 +-
 README.md                                     |  14 +-
 ddi/dataset.py                                |  28 +-
 ddi/model_attn_siamese.py                     |  31 +-
 ddi/run_workflow.py                           |  50 ++-
 ddi/utilities.py                              | 102 +----
 .../AttnWSiamese_data_generation.ipynb        |  52 ++-
 .../jupyter/AttnWSiamese_hyperparam.ipynb     | 392 ++++--------------
 .../jupyter/AttnWSiamese_train_eval.ipynb     |  92 ++--
 9 files changed, 288 insertions(+), 479 deletions(-)

diff --git a/.gitignore b/.gitignore
index f4a5368..663c0ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,5 +51,7 @@ venv.bak/
 # orisenbazuru
 explore.py
 notebooks/orisenbazuru/*
-cluster/data/medinfmk/ddi/processed/*
-ideas.txt.rtf
\ No newline at end of file
+notebooks/archive/*
+ideas.txt.rtf
+trained_models
+data/processed
\ No newline at end of file
diff --git a/README.md b/README.md
index 43f97ba..25eca29 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,16 @@
-# side-effects
+# 📣 AttentionDDI 💊
+
+This repository contains the code for the AttentionDDI model implementation with PyTorch. 
+
+AttentionDDI is a Siamese multi-head self-Attention multi-modal neural network model used for drug-drug interaction (DDI) predictions.
 
 ## Installation
 
 * `git clone` the repo and `cd` into it.
-* Run `pip install -e .` to install the repo's python package.
\ No newline at end of file
+* Run `pip install -e .` to install the repo's python package.
+
+## Running 🏃
+
+1. use `notebooks/jupyter/AttnWSiamese_data_generation.ipynb` to generate DataTensors from the drug similarity matrices.
+2. use `notebooks/jupyter/AttnWSiamese_hyperparam.ipynb` to find the best performing model hyperparameters.
+3. use `notebooks/jupyter/AttnWSiamese_train_eval.ipynb` to train / test on the best hyperparameters.
\ No newline at end of file
diff --git a/ddi/dataset.py b/ddi/dataset.py
index 5185113..7b426ef 100644
--- a/ddi/dataset.py
+++ b/ddi/dataset.py
@@ -12,7 +12,7 @@
 
 class DDIDataTensor(Dataset):
     
-    def __init__(self, y, X_a, X_b):
+    def __init__(self, X_a, X_b, y):
           
         self.X_a = X_a # tensor.float32, (drug pairs, features)
         self.X_b = X_b # tensor.float32, (drug pairs, features)
@@ -46,7 +46,7 @@ def __len__(self):
 
 class PartitionDataTensor(Dataset):
 
-    def __init__(self, ddi_datatensor, gip_datatensor, partition_ids, dsettype, fold_num, is_siamese):
+    def __init__(self, ddi_datatensor, gip_datatensor, partition_ids, dsettype, fold_num, is_siamese=True):
         self.ddi_datatensor = ddi_datatensor  # instance of :class:`DDIDataTensor`
         self.gip_datatensor = gip_datatensor # instance of :class:`GIPDataTensor`
         self.partition_ids = partition_ids  # list of indices for drug pairs
@@ -60,6 +60,7 @@ def __getitem__(self, indx):
         X_a_gip, X_b_gip, gip_indx = self.gip_datatensor[target_id]
         # combine gip with other matrices
         X_a, X_b, y, ddi_indx = self.ddi_datatensor[target_id]
+        # (sim_types, features)
         X_a_comb = torch.cat([X_a, X_a_gip], axis=0)
         X_b_comb = torch.cat([X_b, X_b_gip], axis=0)
         X_comb = torch.cat([X_a_comb, X_b_comb])#.view(-1)
@@ -187,27 +188,6 @@ def get_y_from_interactionmat(interaction_mat):
 #     c_comb = c.tolist() + cl.tolist()
 #     return interaction_mat[r_comb,c_comb]
 
-def compute_gip_profile(adj, bw=1.):
-    """approach based on Olayan et al. https://doi.org/10.1093/bioinformatics/btx731 """
-    
-    ga = np.dot(adj,np.transpose(adj))
-    ga = bw*ga/np.mean(np.diag(ga))
-    di = np.diag(ga)
-    x =  np.tile(di,(1,di.shape[0])).reshape(di.shape[0],di.shape[0])
-    d =x+np.transpose(x)-2*ga
-    return np.exp(-d)
-
-def compute_kernel(mat, k_bandwidth, epsilon=1e-9):
-    """computes gaussian kernel from 2D matrix
-    
-       Approach based on van Laarhoven et al. doi:10.1093/bioinformatics/btr500
-    
-    """
-    r, c = mat.shape # 2D matrix
-    # computes pairwise l2 distance
-    dist_kernel = squareform(pdist(mat, metric='euclidean')**2)
-    gamma = k_bandwidth/(np.clip((scpnorm(mat, axis=1, keepdims=True)**2) * 1/c, a_min=epsilon, a_max=None))
-    return np.exp(-gamma*dist_kernel)
 
 def construct_sampleid_ddipairs(interaction_mat):
     # take indices off the diagnoal by 1
@@ -339,7 +319,7 @@ def report_label_distrib(labels):
         print("class:", label, "norm count:", norm_counts[i])
 
 
-def generate_partition_datatensor(ddi_datatensor, gip_dtensor_perfold, data_partitions, is_siamese):
+def generate_partition_datatensor(ddi_datatensor, gip_dtensor_perfold, data_partitions, is_siamese=True):
     datatensor_partitions = {}
     for fold_num in data_partitions:
         datatensor_partitions[fold_num] = {}
diff --git a/ddi/model_attn_siamese.py b/ddi/model_attn_siamese.py
index 842c532..3a945b4 100644
--- a/ddi/model_attn_siamese.py
+++ b/ddi/model_attn_siamese.py
@@ -58,16 +58,19 @@ def forward(self, X):
         Args:
             X: tensor, (batch, ddi similarity type vector, input_size)
         """
-        
+        bsize, num_modal, inp_dim = X.shape
+        attn_tensor = X.new_zeros((bsize, num_modal, num_modal))
         out = []
         for SH_layer in self.multihead_pipeline:
-            z, __ = SH_layer(X)
+            z, attn_w_normalized = SH_layer(X)
             out.append(z)
+            attn_tensor += attn_w_normalized
         # concat on the feature dimension
         out = torch.cat(out, -1) 
+        attn_tensor = attn_tensor/len(self.multihead_pipeline)
         
         # return a unified vector mapping of the different self-attention blocks
-        return self.Wz(out)
+        return self.Wz(out), attn_tensor
         
 
 class TransformerUnit(nn.Module):
@@ -98,7 +101,7 @@ def forward(self, X):
             X: tensor, (batch, ddi similarity type vector, input_size)
         """
         # z is tensor of size (batch, ddi similarity type vector, input_size)
-        z = self.multihead_attn(X)
+        z, attn_tensor = self.multihead_attn(X)
         # layer norm with residual connection
         z = self.layernorm_1(z + X)
         z = self.dropout(z)
@@ -106,7 +109,7 @@ def forward(self, X):
         z = self.layernorm_2(z_ff + z)
         z = self.dropout(z)
         
-        return z
+        return z, attn_tensor
         
 class FeatureEmbAttention(nn.Module):
     def __init__(self, input_dim):
@@ -167,7 +170,7 @@ def __init__(self, input_size=586, input_embed_dim=64, num_attn_heads=8, mlp_emb
         self.Wembed = nn.Linear(input_size, embed_size)
         
         trfunit_layers = [TransformerUnit(embed_size, num_attn_heads, mlp_embed_factor, nonlin_func, pdropout) for i in range(num_transformer_units)]
-        self.trfunit_pipeline = nn.Sequential(*trfunit_layers)
+        self.trfunit_pipeline = nn.ModuleList(trfunit_layers)
 
         self.pooling_mode = pooling_mode
         if pooling_mode == 'attn':
@@ -187,9 +190,15 @@ def forward(self, X):
             X: tensor, (batch, ddi similarity type vector, input_size)
         """
 
-        # X = self.Wembed(X)         
         # mean pooling TODO: add global attention layer or other pooling strategy
-        z = self.trfunit_pipeline(X)
+        bsize, num_modal, inp_dim = X.shape
+        attn_tensor = X.new_zeros((bsize, num_modal, num_modal))
+        xinput = X
+        for encunit in self.trfunit_pipeline:
+            z, attn_h_tensor = encunit(xinput)
+            xinput = z
+            attn_tensor += attn_h_tensor
+        attn_tensor = attn_tensor/len(self.trfunit_pipeline)
         
         # pool across similarity type vectors
         # Note: z.mean(dim=1) will change shape of z to become (batch, input_size)
@@ -204,7 +213,7 @@ def forward(self, X):
             z = self.pooling(z, dim=1)
             fattn_w_norm = None
         
-        return z, fattn_w_norm
+        return z, fattn_w_norm, attn_tensor
 
 class DDI_SiameseTrf(nn.Module):
 
@@ -226,9 +235,7 @@ def __init__(self, input_dim, dist, num_classes=2):
         # perform log softmax on the feature dimension
         self.log_softmax = nn.LogSoftmax(dim=-1)
 
-        self._init_params_()
-        print('updated')
-        
+        self._init_params_()        
         
     def _init_params_(self):
         _init_model_params(self.named_parameters())
diff --git a/ddi/run_workflow.py b/ddi/run_workflow.py
index 14518f5..d9d2be6 100644
--- a/ddi/run_workflow.py
+++ b/ddi/run_workflow.py
@@ -184,7 +184,7 @@ def run_ddi(data_partition, dsettypes, config, options, wrk_dir,
         else:
             class_weights = torch.tensor([1]*2).type(fdtype).to(device)  # weighting all casess equally
 
-    print("class weights", class_weights)
+    # print("class weights", class_weights)
     # binary cross entropy
     loss_bce = torch.nn.BCEWithLogitsLoss(pos_weight=class_weights, reduction='mean')
     loss_nlll = torch.nn.NLLLoss(weight=class_weights, reduction='mean')  # negative log likelihood loss
@@ -354,7 +354,7 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
     cld = construct_load_dataloaders(data_partition, dsettypes, dataloader_config, wrk_dir)
     # dictionaries by dsettypes
     data_loaders, epoch_loss_avgbatch, score_dict, class_weights, flog_out = cld
-    print(flog_out)
+    # print(flog_out)
     device = get_device(to_gpu, gpu_index)  # gpu device
     fdtype = options['fdtype']
 
@@ -363,7 +363,7 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
     else:
         class_weights = torch.tensor([1]*2).type(fdtype).to(device)  # weighting all casess equally
 
-    print("class weights", class_weights)
+    # print("class weights", class_weights)
     loss_func = torch.nn.NLLLoss(weight=class_weights, reduction='mean')  # negative log likelihood loss
     loss_contrastive = ContrastiveLoss(options.get('contrastiveloss_margin', 0.5), reduction='mean')
     loss_contrastive.type(fdtype).to(device)
@@ -401,10 +401,9 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
     for m, m_name in models:
         m.type(fdtype).to(device)
     
-    print('cool')
     if('train' in data_loaders):
         weight_decay = options.get('weight_decay', 1e-4)
-        print('weight_decay', weight_decay)
+        # print('weight_decay', weight_decay)
         # split model params into attn parameters and other params
         # models_param = add_weight_decay_except_attn([ddi_model, ddi_siamese], weight_decay)
         # see paper Cyclical Learning rates for Training Neural Networks for parameters' choice
@@ -415,7 +414,7 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
         c_step_size = int(np.ceil(5*num_iter))  # this should be 2-10 times num_iter
         base_lr = 3e-4
         max_lr = 5*base_lr  # 3-5 times base_lr
-        print('max lr', max_lr)
+        # print('max lr', max_lr)
         optimizer = torch.optim.Adam(models_param, weight_decay=weight_decay, lr=base_lr)
         cyc_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr, max_lr, step_size_up=c_step_size,
                                                         mode='triangular', cycle_momentum=False)
@@ -432,6 +431,8 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
     ReaderWriter.dump_data(options, os.path.join(config_dir, 'exp_options.pkl'))
     # store attention weights for validation and test set
     seqid_fattnw_map = {dsettype: {'X_a':{}, 'X_b':{}} for dsettype in data_loaders if dsettype in {'test'}}
+    seqid_hattnw_map = {dsettype: {'X_a':{}, 'X_b':{}} for dsettype in data_loaders if dsettype in {'test'}}
+
     pair_names = ('a', 'b')
 
     for epoch in range(num_epochs):
@@ -469,13 +470,17 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
 
                 with torch.set_grad_enabled(dsettype == 'train'):
                     num_samples_perbatch = X_a.size(0)
-                    z_a, fattn_w_scores_a = ddi_model(X_a)
-                    z_b, fattn_w_scores_b = ddi_model(X_b)
+                    z_a, fattn_w_scores_a, hattn_w_scores_a = ddi_model(X_a)
+                    z_b, fattn_w_scores_b, hattn_w_scores_b = ddi_model(X_b)
 
                     if(dsettype in seqid_fattnw_map and model_config.pooling_mode == 'attn'):
                         for l, attn_scores in enumerate((fattn_w_scores_a, fattn_w_scores_b)):
                             suffix = pair_names[l]
                             seqid_fattnw_map[dsettype][f'X_{suffix}'].update({sid.item():attn_scores[c].detach().cpu() for c, sid in enumerate(ids)})
+                        
+                        for l, attn_scores in enumerate((hattn_w_scores_a, hattn_w_scores_b)):
+                            suffix = pair_names[l]
+                            seqid_hattnw_map[dsettype][f'X_{suffix}'].update({sid.item():attn_scores[c].detach().cpu() for c, sid in enumerate(ids)})
 
                     
                     logsoftmax_scores, dist = ddi_siamese(z_a, z_b)
@@ -520,6 +525,7 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
                 elif(dsettype == 'test'):
                     # dump attention weights for the test data
                     dump_dict_content(seqid_fattnw_map, ['test'], 'sampleid_fattnw_map', wrk_dir)
+                    dump_dict_content(seqid_hattnw_map, ['test'], 'sampleid_hattnw_map', wrk_dir)
                 if dsettype in {'test', 'validation'}:
                     predictions_df = build_predictions_df(ddi_ids, ref_class, pred_class, prob_scores_arr)
                     predictions_path = os.path.join(wrk_dir, f'predictions_{dsettype}.csv')
@@ -632,7 +638,7 @@ def get_best_config_from_hyperparamsearch(hyperparam_search_dir, num_folds=5, nu
         if(os.path.isfile(score_file)):
             try:
                 mscore = ReaderWriter.read_data(score_file)
-                print(mscore)
+                # print(mscore)
                 scores[config_num, 0] = mscore.best_epoch_indx
                 scores[config_num, 1] = mscore.s_precision
                 scores[config_num, 2] = mscore.s_recall
@@ -677,9 +683,13 @@ def train_val_run(datatensor_partitions, config_map, train_val_dir, fold_gpu_map
                     state_dict_dir=None, to_gpu=True, 
                     gpu_index=fold_gpu_map[fold_num])  
 
-
-
-def test_run(datatensor_partitions, config_map, train_val_dir, test_dir, fold_gpu_map, num_epochs=1):
+def test_run(datatensor_partitions, 
+             config_map, 
+             train_val_dir, 
+             test_dir, 
+             fold_gpu_map, 
+             suffix_testfname=None,
+             num_epochs=1):
     dsettypes = ['test']
     mconfig, options = config_map
     options['num_epochs'] = num_epochs  # override number of epochs using user specified value
@@ -692,7 +702,10 @@ def test_run(datatensor_partitions, config_map, train_val_dir, test_dir, fold_gp
         if os.path.exists(train_dir):
             # load state_dict pth
             state_dict_pth = os.path.join(train_dir, 'model_statedict')
-            path = os.path.join(test_dir, 'test', 'fold_{}'.format(fold_num))
+            if suffix_testfname:
+                path = os.path.join(test_dir, f'test_{suffix_testfname}', 'fold_{}'.format(fold_num))
+            else:
+                path = os.path.join(test_dir, 'test', 'fold_{}'.format(fold_num))
             test_wrk_dir = create_directory(path)
             if options.get('loss_func') == 'bceloss':
                 run_ddi(data_partition, dsettypes, mconfig, options, test_wrk_dir,
@@ -711,6 +724,17 @@ def train_test_partition(datatensor_partition, config_map, tr_val_dir, fold_gpu_
     train_val_run(datatensor_partition, config_map, tr_val_dir, fold_gpu_map, num_epochs=config_epochs)
     test_run(datatensor_partition, config_map, tr_val_dir, tr_val_dir, fold_gpu_map, num_epochs=1)
     
+def test_partition(datatensor_partition, config_map, tr_val_dir, fold_gpu_map, suffix_testfname):
+    config_epochs = config_map[0]['model_config'].num_epochs
+    print(config_epochs)
+    test_run(datatensor_partition, 
+             config_map, 
+             tr_val_dir, 
+             tr_val_dir, 
+             fold_gpu_map, 
+             suffix_testfname=suffix_testfname,
+             num_epochs=1)
+
 def train_test_hyperparam_conf(hyperparam_comb, gpu_num, datatensor_partition, fold_gpu_map, exp_dir, num_drugs, queue, exp_iden):
     text_to_save = str(hyperparam_comb) 
     print("hyperparam_comb:", text_to_save, "gpu num:", str(gpu_num))
diff --git a/ddi/utilities.py b/ddi/utilities.py
index 7429d2f..13a99ed 100644
--- a/ddi/utilities.py
+++ b/ddi/utilities.py
@@ -25,7 +25,7 @@ def __repr__(self):
                "".format(self.best_epoch_indx, self.s_auc, self.s_aupr, self.s_f1, self.s_precision, self.s_recall)
         return desc
 
-def get_performance_results(similarity_type, target_dir, num_folds, dsettype):
+def get_performance_results(similarity_type, target_dir, num_folds, dsettype, suffix_testfname=None):
     all_perf = {}
     num_metrics = 3 # number of metrics to focus on
     perf_dict = [{} for i in range(num_metrics)]  # track auc, aupr, f1 measure
@@ -33,13 +33,18 @@ def get_performance_results(similarity_type, target_dir, num_folds, dsettype):
         prefix = 'train_val'
     else:
         prefix = dsettype
+        if suffix_testfname:
+            prefix = prefix + "_" + suffix_testfname
+
     for fold_num in range(num_folds):
 
         fold_dir = os.path.join(target_dir,
                 '{}'.format(prefix),
                 'fold_{}'.format(fold_num))
+        # print('fold_dir:', fold_dir)
 
         score_file = os.path.join(fold_dir, 'score_{}.pkl'.format(dsettype))
+        # print(score_file)
 
         if os.path.isfile(score_file):
             mscore = ReaderWriter.read_data(score_file)
@@ -60,14 +65,22 @@ def get_performance_results(similarity_type, target_dir, num_folds, dsettype):
     return perf_df
 
 
-def build_performance_dfs(similarity_types, target_dir, num_folds, dsettype):
+def build_performance_dfs(similarity_types, target_dir, num_folds, dsettype, suffix_testfname=None):
     auc_df = pd.DataFrame()
     aupr_df = pd.DataFrame()
     f1_df = pd.DataFrame()
     target_dir = create_directory(target_dir, directory="parent")
     print(target_dir)
     for sim_type in similarity_types:
-        s_auc, s_aupr, s_f1 = get_performance_results(sim_type, target_dir, num_folds, dsettype)
+        if suffix_testfname is not None:
+            suff_testfname = suffix_testfname + sim_type
+        else:
+            suff_testfname = None
+        s_auc, s_aupr, s_f1 = get_performance_results(sim_type, 
+                                                      target_dir, 
+                                                      num_folds, 
+                                                      dsettype, 
+                                                      suffix_testfname=suff_testfname)
         auc_df = pd.concat([auc_df, s_auc], sort=True)
         aupr_df = pd.concat([aupr_df, s_aupr], sort=True)
         f1_df = pd.concat([f1_df, s_f1], sort=True)
@@ -277,7 +290,6 @@ def plot_loss(epoch_loss_avgbatch, wrk_dir):
         plt.savefig(os.path.join(wrk_dir, os.path.join(dsettype + ".pdf")))
         plt.close()
 
-
 def plot_xy(x, y, xlabel, ylabel, legend, fname, wrk_dir):
     plt.figure(figsize=(9, 6))
     plt.plot(x, y, 'r')
@@ -288,92 +300,10 @@ def plot_xy(x, y, xlabel, ylabel, legend, fname, wrk_dir):
     plt.savefig(os.path.join(wrk_dir, os.path.join(fname + ".pdf")))
     plt.close()
 
-def find_youdenj_threshold(ref_target, prob_poslabel, fig_dir=None):
-    fpr, tpr, thresholds = roc_curve(ref_target, prob_poslabel)
-    s_auc = roc_auc_score(ref_target, prob_poslabel)
-    thresholds[0] = 1
-    plt.figure(figsize=(9, 6))
-    plt.plot(fpr, tpr, 'b+', label=f'TPR vs FPR => AUC:{s_auc:.2}')
-    plt.xlabel('False positive rate')
-    plt.ylabel('True positive rate')
-    plt.title('ROC curve')
-    youden_indx = np.argmax(tpr - fpr) # the index where the difference between tpr and fpr is max
-    optimal_threshold = thresholds[youden_indx]
-    plt.plot(fpr[youden_indx], tpr[youden_indx], marker='o', markersize=3, color="red", label=f'optimal probability threshold:{optimal_threshold:.2}')
-    plt.legend(loc='best')
-    if fig_dir:
-        plt.savefig(f'{fig_dir}.pdf')
-        plt.close()
-    return fpr, tpr, thresholds, optimal_threshold
-
-def analyze_precision_recall_curve(ref_target, prob_poslabel, fig_dir=None):
-    pr, rec, thresholds = precision_recall_curve(ref_target, prob_poslabel)
-    avg_precision = average_precision_score(ref_target, prob_poslabel)
-    thresholds[0] = 1
-    plt.figure(figsize=(9, 6))
-    plt.plot(rec, pr, 'b+', label=f'Precision vs Recall => Average Precision (AP):{avg_precision:.2}')
-    plt.xlabel('Recall')
-    plt.ylabel('Precision')
-    plt.title('Precision vs. recall curve')
-    indx = np.argmax(pr + rec)
-    print('indx', indx)
-    optimal_threshold = thresholds[indx]
-    plt.plot(rec[indx], pr[indx], marker='o', markersize=3, color="red", label=f'optimal probability threshold:{optimal_threshold:.2}')
-    plt.legend(loc='best')
-    if fig_dir:
-        plt.savefig(f'{fig_dir}.pdf')
-        plt.close()
-    return pr, rec, thresholds, optimal_threshold
-
 def delete_directory(directory):
     if(os.path.isdir(directory)):
         shutil.rmtree(directory)
 
-
-# code from keras https://github.com/keras-team/keras/blob/master/keras/utils/np_utils.py
-def to_categorical(y, num_classes=None, dtype='float32'):
-    """Converts a class vector (integers) to binary class matrix.
-    E.g. for use with categorical_crossentropy.
-    # Arguments
-        y: class vector to be converted into a matrix
-            (integers from 0 to num_classes).
-        num_classes: total number of classes.
-        dtype: The data type expected by the input, as a string
-            (`float32`, `float64`, `int32`...)
-    # Returns
-        A binary matrix representation of the input. The classes axis
-        is placed last.
-    # Example
-    ```python
-    # Consider an array of 5 labels out of a set of 3 classes {0, 1, 2}:
-    > labels
-    array([0, 2, 1, 2, 0])
-    # `to_categorical` converts this into a matrix with as many
-    # columns as there are classes. The number of rows
-    # stays the same.
-    > to_categorical(labels)
-    array([[ 1.,  0.,  0.],
-           [ 0.,  0.,  1.],
-           [ 0.,  1.,  0.],
-           [ 0.,  0.,  1.],
-           [ 1.,  0.,  0.]], dtype=float32)
-    ```
-    """
-
-    y = np.array(y, dtype='int')
-    input_shape = y.shape
-    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
-        input_shape = tuple(input_shape[:-1])
-    y = y.ravel()
-    if not num_classes:
-        num_classes = np.max(y) + 1
-    n = y.shape[0]
-    categorical = np.zeros((n, num_classes), dtype=dtype)
-    categorical[np.arange(n), y] = 1
-    output_shape = input_shape + (num_classes,)
-    categorical = np.reshape(categorical, output_shape)
-    return categorical
-
 def format_bytes(size):
     # 2**10 = 1024
     power = 2**10
diff --git a/notebooks/jupyter/AttnWSiamese_data_generation.ipynb b/notebooks/jupyter/AttnWSiamese_data_generation.ipynb
index 2900422..dd18ab0 100644
--- a/notebooks/jupyter/AttnWSiamese_data_generation.ipynb
+++ b/notebooks/jupyter/AttnWSiamese_data_generation.ipynb
@@ -18,8 +18,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import ddi\n",
-    "import sys"
+    "import os\n",
+    "import sys\n",
+    "# Provide access to modules in repo.\n",
+    "sys.path.insert(0, os.path.abspath('../../'))"
    ]
   },
   {
@@ -32,6 +34,7 @@
     "import pandas as pd\n",
     "import datetime\n",
     "import seaborn as sns\n",
+    "import ddi\n",
     "from ddi.dataset import *"
    ]
   },
@@ -90,11 +93,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "DSdataset_name = 'DS3' # or DS2, DS3\n",
+    "DSdataset_name = 'DS3' # or DDS2, DS3\n",
     "\n",
     "# For DS3:\n",
-    "# interact_matfname_DS3 = 'NCRDInteractionMat'\n",
-    "interact_matfname_DS3 = 'CRDInteractionMat'"
+    "if DSdataset_name == 'DS3':\n",
+    "#     interact_matfname_DS3 = 'NCRDInteractionMat'\n",
+    "    interact_matfname_DS3 = 'CRDInteractionMat'"
    ]
   },
   {
@@ -233,6 +237,24 @@
     "dpartitions = get_stratified_partitions(y, num_folds=10, valid_set_portion=0.1, random_state=42)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "targetdata_dir = create_directory(exp_iden, os.path.join(up_dir, processed_dir, DSdataset_name, data_fname))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "targetdata_dir"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -516,6 +538,15 @@
     "targetdata_dir"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_tensor.shape, X_a.shape, X_b.shape"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -539,7 +570,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [],
    "source": [
@@ -569,6 +600,13 @@
     "# dump data on disk\n",
     "ReaderWriter.dump_tensor(gip_dtensor_perfold, os.path.join(targetdata_dir, 'gip_dtensor_perfold.torch'))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -587,7 +625,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/jupyter/AttnWSiamese_hyperparam.ipynb b/notebooks/jupyter/AttnWSiamese_hyperparam.ipynb
index 51873e8..46170f8 100644
--- a/notebooks/jupyter/AttnWSiamese_hyperparam.ipynb
+++ b/notebooks/jupyter/AttnWSiamese_hyperparam.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
@@ -14,17 +14,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import ddi\n",
-    "import sys"
+    "import os\n",
+    "import sys\n",
+    "# Provide access to modules in repo.\n",
+    "sys.path.insert(0, os.path.abspath('../../'))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -32,12 +34,13 @@
     "import pandas as pd\n",
     "import datetime\n",
     "import seaborn as sns\n",
+    "import ddi\n",
     "from ddi.dataset import *"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -47,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -67,93 +70,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "number of GPUs available: 8\n",
-      "cuda:0, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n",
-      "cuda:1, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n",
-      "cuda:2, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n",
-      "cuda:3, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n",
-      "cuda:4, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n",
-      "cuda:5, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n",
-      "cuda:6, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n",
-      "cuda:7, name:GeForce GTX 1080 Ti\n",
-      "total memory available: 10.91650390625 GB\n",
-      "total memory allocated on device: 0.0 GB\n",
-      "max memory allocated on device: 0.0 GB\n",
-      "total memory cached on device: 0.0 GB\n",
-      "max memory cached  on device: 0.0 GB\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "report_available_cuda_devices()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "8"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "n_gpu = torch.cuda.device_count()\n",
     "n_gpu"
@@ -163,25 +91,26 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Preparing dataset "
+    "## Loading dataset "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "DSdataset_name = 'DS3' # or DS2, DS3\n",
+    "DSdataset_name = 'DS1' # or DS2, DS3\n",
     "\n",
     "# For DS3:\n",
-    "interact_matfname_DS3 = 'NCRDInteractionMat'\n",
-    "# interact_matfname_DS3 = 'CRDInteractionMat'"
+    "if DSdataset_name == 'DS3':\n",
+    "#     interact_matfname_DS3 = 'NCRDInteractionMat'\n",
+    "    interact_matfname_DS3 = 'CRDInteractionMat'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -228,7 +157,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -251,7 +180,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -260,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -269,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -285,17 +214,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "path_current_dir ../../data/processed/DS3/data_v1\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# read data from disk\n",
     "device_cpu = get_device(to_gpu=False)\n",
@@ -314,12 +235,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Run from here"
+    "### Genearte data tensors"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -328,7 +249,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -337,93 +258,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "fold_num:0, dsettype:train\n",
-      "ID(PartitionDataTensor) 47661235389272\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235390784\n",
-      "\n",
-      "fold_num:0, dsettype:validation\n",
-      "ID(PartitionDataTensor) 47661235391064\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235390784\n",
-      "\n",
-      "fold_num:0, dsettype:test\n",
-      "ID(PartitionDataTensor) 47661235389384\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235390784\n",
-      "\n",
-      "fold_num:1, dsettype:train\n",
-      "ID(PartitionDataTensor) 47661235390728\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391176\n",
-      "\n",
-      "fold_num:1, dsettype:validation\n",
-      "ID(PartitionDataTensor) 47661235391400\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391176\n",
-      "\n",
-      "fold_num:1, dsettype:test\n",
-      "ID(PartitionDataTensor) 47661235391456\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391176\n",
-      "\n",
-      "fold_num:2, dsettype:train\n",
-      "ID(PartitionDataTensor) 47661235391512\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391232\n",
-      "\n",
-      "fold_num:2, dsettype:validation\n",
-      "ID(PartitionDataTensor) 47661235391568\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391232\n",
-      "\n",
-      "fold_num:2, dsettype:test\n",
-      "ID(PartitionDataTensor) 47661235391624\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391232\n",
-      "\n",
-      "fold_num:3, dsettype:train\n",
-      "ID(PartitionDataTensor) 47661235391680\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391288\n",
-      "\n",
-      "fold_num:3, dsettype:validation\n",
-      "ID(PartitionDataTensor) 47661235391736\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391288\n",
-      "\n",
-      "fold_num:3, dsettype:test\n",
-      "ID(PartitionDataTensor) 47661235391792\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391288\n",
-      "\n",
-      "fold_num:4, dsettype:train\n",
-      "ID(PartitionDataTensor) 47661235391848\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391344\n",
-      "\n",
-      "fold_num:4, dsettype:validation\n",
-      "ID(PartitionDataTensor) 47661235391904\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391344\n",
-      "\n",
-      "fold_num:4, dsettype:test\n",
-      "ID(PartitionDataTensor) 47661235391960\n",
-      "ID(DDIDataTensor) 47661235390896\n",
-      "ID(GIPDataTensor) 47661235391344\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# confirm that we separate PartitionDataTensor object and same reference to DDIDataTensor object!\n",
     "for fold_num in datatensor_partitions:\n",
@@ -439,12 +278,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Train and Evaluate workflow"
+    "## Train and Evaluate workflow"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -453,7 +292,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -467,30 +306,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "807"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "num_drugs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "# example of hyperparameter options to consider\n",
     "input_embed_dim = [128]\n",
     "num_attn_heads = [1,2]\n",
     "num_transformer_units = [1]\n",
@@ -499,85 +328,55 @@
     "mlp_embed_factor = [2]\n",
     "pooling_mode = ['attn']\n",
     "dist_opt = ['cosine']\n",
-    "l2_reg = [0,1e-8]\n",
-    "batch_size = [400]\n",
-    "num_epochs = [200]\n",
-    "loss_w = [0.05]"
+    "l2_reg = [0,1e-6, 1e-8]\n",
+    "batch_size = [400,1000]\n",
+    "num_epochs = [100, 200]\n",
+    "loss_w = [0.5, 0.05]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "8\n"
-     ]
-    }
-   ],
-   "source": [
-    "hyperparam_space = list(itertools.product(*[input_embed_dim, num_attn_heads,  num_transformer_units, p_dropout,\n",
-    "                                                nonlin_func, mlp_embed_factor,pooling_mode,dist_opt, l2_reg, batch_size,\n",
-    "                                                num_epochs, loss_w]))\n",
-    "print(len(hyperparam_space))"
+   "outputs": [],
+   "source": [
+    "hyperparam_opt = (input_embed_dim,num_attn_heads, num_transformer_units, p_dropout, \n",
+    "                  nonlin_func, mlp_embed_factor, pooling_mode, dist_opt,\n",
+    "                  l2_reg, batch_size, num_epochs, loss_w)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "hyperparam_opt = (input_embed_dim,num_attn_heads, num_transformer_units, p_dropout, \n",
-    "                  nonlin_func, mlp_embed_factor, pooling_mode, dist_opt,\n",
-    "                  l2_reg, batch_size, num_epochs)\n"
+    "hyperparam_space = list(itertools.product(*hyperparam_opt))\n",
+    "print(len(hyperparam_space))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'../data/processed/DS3/experiments/simtypeall_NCRDInteractionMat'"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "exp_dir = create_directory(exp_iden, os.path.join(processed_dir, DSdataset_name, 'experiments'))\n",
     "exp_dir"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_folds=10"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Training"
+    "### Training"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -589,8 +388,7 @@
     "    q_process.join()\n",
     "    print(\"<<< joined hyperparam search process\")\n",
     "    \n",
-    "def create_q_process(hyperparam_comb, gpu_num, datatensor_partition, exp_dir, num_drugs, queue, exp_iden):\n",
-    "    fold_gpu_map = {0:gpu_num}\n",
+    "def create_q_process(hyperparam_comb, gpu_num, datatensor_partition, fold_gpu_map, exp_dir, num_drugs, queue, exp_iden):\n",
     "    return mp.Process(target=ddi.run_workflow.train_test_hyperparam_conf, args=(hyperparam_comb, \n",
     "                                                                                gpu_num, \n",
     "                                                                                datatensor_partition, \n",
@@ -603,51 +401,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      ">>> spawning hyperparam search process\n",
-      ">>> spawning hyperparam search process\n",
-      ">>> spawning hyperparam search process\n",
-      ">>> spawning hyperparam search process\n",
-      ">>> spawning hyperparam search process\n",
-      ">>> spawning hyperparam search process\n",
-      ">>> spawning hyperparam search process\n",
-      ">>> spawning hyperparam search process\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 3\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 2\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 0\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 1\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 5\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 7\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 4\n",
-      "<<< joined hyperparam search process\n",
-      "released_gpu_num: 6\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch.multiprocessing as mp\n",
     "mp.set_start_method(\"spawn\", force=True)\n",
     "\n",
     "queue = mp.Queue()\n",
     "q_processes = []\n",
+    "num_hyper_options = len(hyperparam_space)\n",
+    "spawned_processes = min(n_gpu, num_hyper_options)\n",
+    "chosen_fold = 0\n",
     "\n",
-    "for q_i in range(min(n_gpu, len(hyperparam_space))):\n",
+    "for q_i in range(spawned_processes):\n",
+    "    \n",
+    "    fold_gpu_map = {chosen_fold:q_i}\n",
     "    q_process = create_q_process(hyperparam_comb=hyperparam_space[q_i], \n",
     "                                 gpu_num=q_i, \n",
-    "                                 datatensor_partition={0:datatensor_partitions[0]}, \n",
+    "                                 datatensor_partition={chosen_fold:datatensor_partitions[chosen_fold]},\n",
+    "                                 fold_gpu_map=fold_gpu_map,\n",
     "                                 exp_dir=exp_dir, \n",
     "                                 num_drugs=num_drugs, \n",
     "                                 queue=queue,\n",
@@ -657,22 +430,31 @@
     "\n",
     "spawned_processes = n_gpu\n",
     "    \n",
-    "for q_i in range(len(hyperparam_space)):\n",
+    "for q_i in range(num_hyper_options):\n",
     "    join_q_process(q_processes[q_i])\n",
     "    released_gpu_num = queue.get()\n",
     "    print(\"released_gpu_num:\", released_gpu_num)\n",
-    "    if(spawned_processes < len(hyperparam_space)):\n",
-    "        q_process = create_q_process(hyperparam_comb=hyperparam_space[spawned_processes], \n",
-    "                             gpu_num=released_gpu_num, \n",
-    "                             datatensor_partition={0:datatensor_partitions[0]}, \n",
-    "                             exp_dir=exp_dir, \n",
-    "                             num_drugs=num_drugs, \n",
-    "                             queue=queue,\n",
-    "                             exp_iden=exp_iden)\n",
+    "    if(spawned_processes < num_hyper_options):\n",
+    "        fold_gpu_map = {chosen_fold:released_gpu_num}\n",
+    "        q_process = create_q_process(hyperparam_comb=hyperparam_space[spawned_processes],\n",
+    "                                     gpu_num=released_gpu_num,\n",
+    "                                     datatensor_partition={chosen_fold:datatensor_partitions[chosen_fold]},\n",
+    "                                     fold_gpu_map=fold_gpu_map,\n",
+    "                                     exp_dir=exp_dir, \n",
+    "                                     num_drugs=num_drugs, \n",
+    "                                     queue=queue,\n",
+    "                                     exp_iden=exp_iden)\n",
     "        q_processes.append(q_process)\n",
     "        spawn_q_process(q_process)\n",
     "        spawned_processes = spawned_processes + 1"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -691,7 +473,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/jupyter/AttnWSiamese_train_eval.ipynb b/notebooks/jupyter/AttnWSiamese_train_eval.ipynb
index 1afca78..54771c8 100644
--- a/notebooks/jupyter/AttnWSiamese_train_eval.ipynb
+++ b/notebooks/jupyter/AttnWSiamese_train_eval.ipynb
@@ -18,8 +18,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import ddi\n",
-    "import sys"
+    "import os\n",
+    "import sys\n",
+    "# Provide access to modules in repo.\n",
+    "sys.path.insert(0, os.path.abspath('../../'))"
    ]
   },
   {
@@ -40,6 +42,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import ddi\n",
     "from ddi.dataset import *\n",
     "from ddi.utilities import *\n",
     "from ddi.run_workflow import *"
@@ -79,7 +82,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Preparing dataset "
+    "## Loading dataset "
    ]
   },
   {
@@ -91,8 +94,9 @@
     "DSdataset_name = 'DS1' # or DS2, DS3\n",
     "\n",
     "# For DS3:\n",
-    "# interact_matfname_DS3 = 'NCRDInteractionMat'\n",
-    "interact_matfname_DS3 = 'CRDInteractionMat'\n",
+    "if DSdataset_name == 'DS3':\n",
+    "#     interact_matfname_DS3 = 'NCRDInteractionMat'\n",
+    "    interact_matfname_DS3 = 'CRDInteractionMat'\n",
     "\n",
     "train_Siamese = True"
    ]
@@ -227,7 +231,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Run from here"
+    "### Genearte data tensors"
    ]
   },
   {
@@ -308,6 +312,13 @@
     "num_drugs"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model configuration"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -386,7 +397,7 @@
     "                                           similarity_type=exp_iden, \n",
     "                                           model_name='Transformer', \n",
     "                                           hyperparam_opt=hyperparam_opt,\n",
-    "                                           loss_func='nllloss'\n",
+    "                                           loss_func='nllloss',\n",
     "                                           margin=margin_v, \n",
     "                                           loss_w=loss_w)\n",
     "# mconfig, options = build_dditrf_config_map(input_dim=(num_drugs+1)*(len(similarity_types)+1)*2, \n",
@@ -477,8 +488,13 @@
     "    q_process.join()\n",
     "    print(\"<<< joined hyperparam search process\")\n",
     "    \n",
-    "def create_q_process(datatensor_partition, config_map, tr_val_dir, fold_gpu_map):\n",
-    "    return mp.Process(target=ddi.run_workflow.train_test_partition, args=(datatensor_partition, config_map, tr_val_dir, fold_gpu_map))"
+    "def create_q_process(gpu_num, datatensor_partition, config_map, tr_val_dir, fold_gpu_map, queue):\n",
+    "    return mp.Process(target=ddi.run_workflow.train_test_partition, args=(gpu_num,\n",
+    "                                                                          datatensor_partition, \n",
+    "                                                                          config_map, \n",
+    "                                                                          tr_val_dir, \n",
+    "                                                                          fold_gpu_map,\n",
+    "                                                                          queue))"
    ]
   },
   {
@@ -492,18 +508,6 @@
     "datatensor_partitions"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "start_part = 5\n",
-    "\n",
-    "fold_gpu_map = {(i+start_part):i for i in range (n_gpu)}\n",
-    "fold_gpu_map"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -515,14 +519,40 @@
     "\n",
     "queue = mp.Queue()\n",
     "q_processes = []\n",
+    "num_folds = len(datatensor_partitions)\n",
+    "num_folds = 2\n",
+    "spawned_processes = min(n_gpu, num_folds)\n",
+    "\n",
+    "for q_i in range(spawned_processes):\n",
+    "    \n",
+    "    fold_gpu_map = {q_i:q_i}\n",
+    "    q_process = create_q_process(gpu_num=q_i,\n",
+    "                                 datatensor_partition={q_i:datatensor_partitions[q_i]},\n",
+    "                                 config_map=config_map,\n",
+    "                                 tr_val_dir=tr_val_dir,\n",
+    "                                 fold_gpu_map=fold_gpu_map,\n",
+    "                                 queue=queue)\n",
     "\n",
-    "for q_i in fold_gpu_map.keys():\n",
-    "    q_process = create_q_process({q_i:datatensor_partitions[q_i]}, config_map, tr_val_dir, fold_gpu_map)\n",
     "    q_processes.append(q_process)\n",
     "    spawn_q_process(q_process)\n",
     "    \n",
-    "for q_i in range(n_gpu):\n",
-    "    join_q_process(q_processes[q_i])"
+    "for q_i in range(num_folds):\n",
+    "    join_q_process(q_processes[q_i])\n",
+    "    released_gpu_num = queue.get()\n",
+    "    print(\"released_gpu_num:\", released_gpu_num)\n",
+    "    if(spawned_processes < num_folds):\n",
+    "        curr_fold = spawned_processes\n",
+    "        fold_gpu_map = {curr_fold:released_gpu_num}\n",
+    "        q_process = create_q_process(gpu_num=released_gpu_num,\n",
+    "                                     datatensor_partition={curr_fold:datatensor_partitions[curr_fold]},\n",
+    "                                     config_map=config_map,\n",
+    "                                     tr_val_dir=tr_val_dir,\n",
+    "                                     fold_gpu_map=fold_gpu_map,\n",
+    "                                     queue=queue)\n",
+    "\n",
+    "        q_processes.append(q_process)\n",
+    "        spawn_q_process(q_process)\n",
+    "        spawned_processes = spawned_processes + 1"
    ]
   },
   {
@@ -531,7 +561,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "auc_df, aupr_df, f1_df= build_performance_dfs(similarity_types, os.path.relpath(tr_val_dir, '..'), num_folds, 'train')\n",
+    "auc_df, aupr_df, f1_df= build_performance_dfs(similarity_types, \n",
+    "                                              os.path.relpath(tr_val_dir, '..'), \n",
+    "                                              num_folds, \n",
+    "                                              'train')\n",
     "\n",
     "for perf_name, perf_df in (('auc', auc_df), ('aupr', aupr_df), ('f1', f1_df)):\n",
     "    print(perf_name)\n",
@@ -546,7 +579,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "auc_df, aupr_df, f1_df= build_performance_dfs(similarity_types, os.path.relpath(tr_val_dir, '..'), num_folds, 'test')\n",
+    "auc_df, aupr_df, f1_df= build_performance_dfs(similarity_types, \n",
+    "                                              os.path.relpath(tr_val_dir, '..'), \n",
+    "                                              num_folds, \n",
+    "                                              'test')\n",
     "\n",
     "for perf_name, perf_df in (('auc', auc_df), ('aupr', aupr_df), ('f1', f1_df)):\n",
     "    print(perf_name)\n",
@@ -572,7 +608,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.8.5"
   }
  },
  "nbformat": 4,