Skip to content

Commit

Permalink
update workflow and loss computation
Browse files Browse the repository at this point in the history
  • Loading branch information
orisenbazuru committed Aug 24, 2020
1 parent 550a571 commit f614b19
Showing 1 changed file with 255 additions and 21 deletions.
276 changes: 255 additions & 21 deletions ddi/run_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __repr__(self):
self.num_epochs)
return desc

def generate_models_config(hyperparam_config, similarity_type, model_name, input_dim, fold_num, fdtype, loss_func='nllloss', margin=0.5):
def generate_models_config(hyperparam_config, similarity_type, model_name, input_dim, fold_num, fdtype, loss_func='nllloss', margin=0.5, loss_w=0.5):

dataloader_config = {'batch_size': hyperparam_config.batch_size,
'num_workers': 0}
Expand All @@ -84,11 +84,12 @@ def generate_models_config(hyperparam_config, similarity_type, model_name, input
'fdtype':fdtype,
'to_gpu':True,
'loss_func':loss_func,
'contrastiveloss_margin':margin}
'contrastiveloss_margin':margin,
'loss_w':loss_w}

return config, options

def build_custom_config_map(similarity_type, model_name, loss_func='nllloss', margin=0.5):
def build_custom_config_map(similarity_type, model_name, loss_func='nllloss', margin=0.5, loss_w=0.5):
if(model_name == 'NDD'):
hyperparam_config = NDDHyperparamConfig(400,300,0.5,0,200,20)
input_dim = 1096
Expand All @@ -97,7 +98,14 @@ def build_custom_config_map(similarity_type, model_name, loss_func='nllloss', ma
input_dim = 548
fold_num = -1
fdtype = torch.float32
mconfig, options = generate_models_config(hyperparam_config, similarity_type, model_name, input_dim, fold_num, fdtype, loss_func=loss_func, margin=margin)
mconfig, options = generate_models_config(hyperparam_config, similarity_type, model_name, input_dim, fold_num, fdtype, loss_func=loss_func, margin=margin, loss_w=loss_w)
return mconfig, options

def build_dditrf_config_map(input_dim, similarity_type, model_name, hyperparam_opt, loss_func='nllloss', margin=0.5, loss_w=0.5):
hyperparam_config = DDITrfHyperparamConfig(*hyperparam_opt)
fold_num = -1
fdtype = torch.float32
mconfig, options = generate_models_config(hyperparam_config, similarity_type, model_name, input_dim, fold_num, fdtype, loss_func=loss_func, margin=margin, loss_w=loss_w)
return mconfig, options

def dump_dict_content(dsettype_content_map, dsettypes, desc, wrk_dir):
Expand All @@ -110,10 +118,10 @@ def get_random_fold(num_folds, random_seed=42):
return fold_num

def hyperparam_model_search(data_partitions, similarity_type, model_name,
input_dim, root_dir, fold_gpu_map, loss_func='nllloss', margin=0.5,
input_dim, root_dir, fold_gpu_map,
loss_func='nllloss', margin=0.5, loss_w=0.5,
fdtype=torch.float32, num_epochs=25,
prob_interval_truemax=0.05, prob_estim=0.95, random_seed=42,
per_base=False):
prob_interval_truemax=0.05, prob_estim=0.95, random_seed=42):
# fold_num = get_random_run(len(data_partitions), random_seed=random_seed)
fold_num = get_random_fold(len(data_partitions), random_seed=random_seed)
dsettypes = ['train', 'validation']
Expand All @@ -127,7 +135,8 @@ def hyperparam_model_search(data_partitions, similarity_type, model_name,
fold_num,
fdtype,
loss_func=loss_func,
margin=margin)
margin=margin,
loss_w=loss_w)
options['num_epochs'] = num_epochs # override number of ephocs here
print("Running {} config #{}".format(similarity_type, counter))
path = os.path.join(root_dir, 'fold_{}'.format(fold_num), 'config_{}'.format(counter))
Expand Down Expand Up @@ -202,7 +211,7 @@ def run_ddi(data_partition, dsettypes, config, options, wrk_dir,
# pytorch version >1.1, scheduler should be called after optimizer
# for cyclical lr scheduler, it should be called after each batch update
num_iter = len(data_loaders['train']) # num_train_samples/batch_size
c_step_size = int(np.ceil(5*num_iter)) # this should be 2-10 times num_iter
c_step_size = int(np.ceil(2*num_iter)) # this should be 2-10 times num_iter
base_lr = 3e-4
max_lr = 5*base_lr # 3-5 times base_lr
cyc_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr, max_lr, step_size_up=c_step_size,
Expand Down Expand Up @@ -309,6 +318,232 @@ def run_ddi(data_partition, dsettypes, config, options, wrk_dir,



# def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
# state_dict_dir=None, to_gpu=True, gpu_index=0):
# pid = "{}".format(os.getpid()) # process id description
# # get data loader config
# dataloader_config = config['dataloader_config']
# cld = construct_load_dataloaders(data_partition, dsettypes, dataloader_config, wrk_dir)
# # dictionaries by dsettypes
# data_loaders, epoch_loss_avgbatch, score_dict, class_weights, flog_out = cld
# print(flog_out)
# # print(class_weights)
# device = get_device(to_gpu, gpu_index) # gpu device
# fdtype = options['fdtype']

# if('train' in class_weights):
# class_weights = class_weights['train'].type(fdtype).to(device) # update class weights to fdtype tensor
# else:
# class_weights = torch.tensor([1]*2).type(fdtype).to(device) # weighting all casess equally

# print("class weights", class_weights)
# loss_func = torch.nn.NLLLoss(weight=class_weights, reduction='mean') # negative log likelihood loss
# loss_contrastive = ContrastiveLoss(options.get('contrastiveloss_margin', 0.5), reduction='mean')
# # loss_contrastive = CosEmbLoss(options.get('contrastiveloss_margin', 0.5), reduction='mean')
# loss_contrastive.type(fdtype).to(device)
# # loss_attn = FeatureEmbAttention(1)
# # loss_attn.type(fdtype).to(device)

# num_epochs = options.get('num_epochs', 50)
# fold_num = options.get('fold_num')

# # parse config dict
# model_config = config['model_config']
# model_name = options['model_name']


# if(model_name == 'Transformer'):
# ddi_model = DDI_Transformer(input_size=options['input_dim'],
# input_embed_dim=model_config.input_embed_dim,
# num_attn_heads=model_config.num_attn_heads,
# mlp_embed_factor=model_config.mlp_embed_factor,
# nonlin_func=model_config.nonlin_func,
# pdropout=model_config.p_dropout,
# num_transformer_units=model_config.num_transformer_units,
# pooling_mode=model_config.pooling_mode)
# ddi_siamese = DDI_SiameseTrf(options['input_dim'],model_config.dist_opt, num_classes=2)

# # ddi_siamese = DDI_SiameseTrf(model_config.input_embed_dim,model_config.dist_opt, num_classes=2)


# # define optimizer and group parameters
# models_param = list(ddi_model.parameters()) + list(ddi_siamese.parameters())
# models = [(ddi_model, model_name), (ddi_siamese, f'{model_name}_Siamese')]

# if(state_dict_dir): # load state dictionary of saved models
# for m, m_name in models:
# m.load_state_dict(torch.load(os.path.join(state_dict_dir, '{}.pkl'.format(m_name)), map_location=device))

# # update models fdtype and move to device
# for m, m_name in models:
# m.type(fdtype).to(device)

# print('cool')
# if('train' in data_loaders):
# weight_decay = options.get('weight_decay', 1e-4)
# print('weight_decay', weight_decay)
# # split model params into attn parameters and other params
# # models_param = add_weight_decay_except_attn([ddi_model, ddi_siamese], weight_decay)
# # see paper Cyclical Learning rates for Training Neural Networks for parameters' choice
# # `https://arxive.org/pdf/1506.01186.pdf`
# # pytorch version >1.1, scheduler should be called after optimizer
# # for cyclical lr scheduler, it should be called after each batch update
# num_iter = len(data_loaders['train']) # num_train_samples/batch_size
# c_step_size = int(np.ceil(5*num_iter)) # this should be 2-10 times num_iter
# base_lr = 3e-4
# max_lr = 5*base_lr # 3-5 times base_lr
# print('max lr', max_lr)
# base_lr = 1e-2
# print('base_lr', base_lr)
# optimizer = torch.optim.Adam(models_param, weight_decay=weight_decay, lr=base_lr)
# # optimizer = torch.optim.Adam(models_param, lr=base_lr)
# # cyc_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr, max_lr, step_size_up=c_step_size,
# # mode='triangular', cycle_momentum=False)
# # scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=max_lr,
# # steps_per_epoch=num_iter,
# # epochs=num_epochs)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=1, verbose=True)

# if ('validation' in data_loaders):
# m_state_dict_dir = create_directory(os.path.join(wrk_dir, 'model_statedict'))

# if(num_epochs > 1):
# fig_dir = create_directory(os.path.join(wrk_dir, 'figures'))

# # dump config dictionaries on disk
# config_dir = create_directory(os.path.join(wrk_dir, 'config'))
# ReaderWriter.dump_data(config, os.path.join(config_dir, 'mconfig.pkl'))
# ReaderWriter.dump_data(options, os.path.join(config_dir, 'exp_options.pkl'))
# # store attention weights for validation and test set
# seqid_fattnw_map = {dsettype: {'X_a':{}, 'X_b':{}} for dsettype in data_loaders if dsettype in {'test'}}
# pair_names = ('a', 'b')

# for epoch in range(num_epochs):
# # print("-"*35)
# for dsettype in dsettypes:
# print("device: {} | similarity_type: {} | fold_num: {} | epoch: {} | dsettype: {} | pid: {}"
# "".format(device, options.get('similarity_type'), fold_num, epoch, dsettype, pid))
# pred_class = []
# ref_class = []
# prob_scores = []
# ddi_ids = []
# data_loader = data_loaders[dsettype]
# # total_num_samples = len(data_loader.dataset)
# epoch_loss = 0.

# if(dsettype == 'train'): # should be only for train
# for m, m_name in models:
# m.train()
# else:
# for m, m_name in models:
# m.eval()

# for i_batch, samples_batch in enumerate(data_loader):
# print('batch num:', i_batch)

# # zero model grad
# if(dsettype == 'train'):
# optimizer.zero_grad()

# X_a, X_b, y_batch, ids = samples_batch
# # print(y_batch.shape)

# X_a = X_a.to(device)
# X_b = X_b.to(device)
# y_batch = y_batch.reshape(-1) # TODO: reshape when preprocessing feature

# y_batch = y_batch.type(torch.int64).to(device)
# # print('ids', ids.shape, ids.dtype)

# with torch.set_grad_enabled(dsettype == 'train'):
# # print("number of examples in batch:", docs_batch.size(0))
# num_samples_perbatch = X_a.size(0)
# # print("number_samples_per_batch", num_samples_perbatch)
# z_a, fattn_w_scores_a = ddi_model(X_a)
# z_b, fattn_w_scores_b = ddi_model(X_b)

# if(dsettype in seqid_fattnw_map and model_config.pooling_mode == 'attn'):
# for l, attn_scores in enumerate((fattn_w_scores_a, fattn_w_scores_b)):
# suffix = pair_names[l]
# seqid_fattnw_map[dsettype][f'X_{suffix}'].update({sid.item():attn_scores[c].detach().cpu() for c, sid in enumerate(ids)})


# logsoftmax_scores, dist = ddi_siamese(z_a, z_b)

# __, y_pred_clss = torch.max(logsoftmax_scores, -1)

# y_pred_prob = torch.exp(logsoftmax_scores.detach().cpu()).numpy()

# # print(y_pred_prob.shape)
# pred_class.extend(y_pred_clss.view(-1).tolist())
# ref_class.extend(y_batch.view(-1).tolist())
# prob_scores.append(y_pred_prob)
# # print(prob_scores)
# ddi_ids.extend(ids.tolist())

# cl = loss_func(logsoftmax_scores, y_batch)

# dl = loss_contrastive(dist.reshape(-1), y_batch.type(fdtype))
# # print(cl)
# # print('cl', cl.shape)
# # print('dl', dl.shape)
# # cl.unsqueeze_(-1).unsqueeze_(-1)
# # dl.unsqueeze_(-1).unsqueeze_(-1)
# # # print('cl', cl.shape)
# # loss, __ = loss_attn(torch.cat([cl,dl], axis=1))
# # loss = loss.mean()
# # # print(loss)

# loss = cl + dl
# # loss = cl
# # loss = 0.8*loss_func(logsoftmax_scores, y_batch) + 0.2*loss_contrastive(dist.reshape(-1), y_batch)
# # loss = loss_func(logsoftmax_scores, y_batch)

# if(dsettype == 'train'):
# # print("computing loss")
# # backward step (i.e. compute gradients)
# loss.backward()
# # optimzer step -- update weights
# optimizer.step()

# epoch_loss += loss.item()

# # torch.cuda.ipc_collect()
# # torch.cuda.empty_cache()
# # end of epoch
# # print("+"*35)
# epoch_loss_avgbatch[dsettype].append(epoch_loss/len(data_loader))


# prob_scores_arr = np.concatenate(prob_scores, axis=0)
# # print(prob_scores_arr.shape)
# modelscore = perfmetric_report(pred_class, ref_class, prob_scores_arr[:,1], epoch, flog_out[dsettype])

# perf = modelscore.s_aupr
# if dsettype == 'validation':
# scheduler.step(perf)
# print('scheduler step for pef', perf)

# best_rec_score = score_dict[dsettype].s_aupr
# if(perf > best_rec_score):
# score_dict[dsettype] = modelscore
# if(dsettype == 'validation'):
# for m, m_name in models:
# torch.save(m.state_dict(), os.path.join(m_state_dict_dir, '{}.pkl'.format(m_name)))
# elif(dsettype == 'test'):
# # dump attention weights for the test data
# dump_dict_content(seqid_fattnw_map, ['test'], 'sampleid_fattnw_map', wrk_dir)
# if dsettype in {'test', 'validation'}:
# predictions_df = build_predictions_df(ddi_ids, ref_class, pred_class, prob_scores_arr)
# predictions_path = os.path.join(wrk_dir, f'predictions_{dsettype}.csv')
# predictions_df.to_csv(predictions_path)

# if(num_epochs > 1):
# plot_loss(epoch_loss_avgbatch, fig_dir)
# # dump_scores
# dump_dict_content(score_dict, list(score_dict.keys()), 'score', wrk_dir)


def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
state_dict_dir=None, to_gpu=True, gpu_index=0):
pid = "{}".format(os.getpid()) # process id description
Expand All @@ -330,10 +565,8 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
print("class weights", class_weights)
loss_func = torch.nn.NLLLoss(weight=class_weights, reduction='mean') # negative log likelihood loss
loss_contrastive = ContrastiveLoss(options.get('contrastiveloss_margin', 0.5), reduction='mean')
# loss_contrastive = CosEmbLoss(options.get('contrastiveloss_margin', 0.5), reduction='mean')
loss_contrastive.type(fdtype).to(device)
# loss_attn = FeatureEmbAttention(1)
# loss_attn.type(fdtype).to(device)
loss_w = options.get('loss_w', 0.5)

num_epochs = options.get('num_epochs', 50)
fold_num = options.get('fold_num')
Expand Down Expand Up @@ -368,7 +601,8 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
# update models fdtype and move to device
for m, m_name in models:
m.type(fdtype).to(device)


print('cool')
if('train' in data_loaders):
weight_decay = options.get('weight_decay', 1e-4)
print('weight_decay', weight_decay)
Expand Down Expand Up @@ -478,7 +712,7 @@ def run_ddiTrf(data_partition, dsettypes, config, options, wrk_dir,
# loss = loss.mean()
# # print(loss)

loss = cl + dl
loss = loss_w*cl + (1-loss_w)*dl
# loss = cl
# loss = 0.8*loss_func(logsoftmax_scores, y_batch) + 0.2*loss_contrastive(dist.reshape(-1), y_batch)
# loss = loss_func(logsoftmax_scores, y_batch)
Expand Down Expand Up @@ -550,17 +784,17 @@ def generate_hyperparam_space(model_name):
opt_lst = [fc1_dim, fc2_dim, dropout_vals, l2_reg_vals, batch_size_vals, num_epochs_vals]
elif(model_name == 'Transformer'):
# TODO: add the possible options for transformer model
embed_dim = [16,32,64,128]
num_attn_heads = [4,6,8]
num_transformer_units = [2]
embed_dim = [None]
num_attn_heads = [1,2]
num_transformer_units = [1,2]
p_dropout = [0.1, 0.3, 0.5]
nonlin_func = [nn.ReLU()]
mlp_embed_factor = [2]
pooling_mode = ['attn']
dist_opt = ['euclidean']
l2_reg = [1e-4, 1e-3, 1e-2]
batch_size = [4000]
num_epochs = [25]
dist_opt = ['cosine']
l2_reg = [1e-4, 1e-3,1e-5]
batch_size = [200, 2500]
num_epochs = [50]
opt_lst = [embed_dim, num_attn_heads,
num_transformer_units, p_dropout,
nonlin_func, mlp_embed_factor, pooling_mode, dist_opt,
Expand Down

0 comments on commit f614b19

Please sign in to comment.